Visual-Regression-Tracker
diff --git a/‎package-lock.json‎
Lines changed: 11 additions & 1 deletion b/‎package-lock.json‎
Lines changed: 11 additions & 1 deletion
diff --git a/‎package.json‎
Lines changed: 2 additions & 1 deletion b/‎package.json‎
Lines changed: 2 additions & 1 deletion
diff --git a/‎src/compare/libs/vlm/README.md‎
Lines changed: 37 additions & 30 deletions b/‎src/compare/libs/vlm/README.md‎
Lines changed: 37 additions & 30 deletions
diff --git a/‎src/compare/libs/vlm/ollama.service.spec.ts‎
Lines changed: 2 additions & 2 deletions b/‎src/compare/libs/vlm/ollama.service.spec.ts‎
Lines changed: 2 additions & 2 deletions
@@ -64,7 +64,8 @@
     "rimraf": "^5.0.1",
     "rxjs": "^7.8.2",
     "swagger-ui-express": "^4.6.3",
-    "uuid-apikey": "^1.5.3"
+    "uuid-apikey": "^1.5.3",
+    "zod": "^4.2.1"
   },
   "devDependencies": {
     "@darraghor/eslint-plugin-nestjs-typed": "^6.9.3",
 
@@ -1,6 +1,30 @@
 # VLM (Vision Language Model) Image Comparison
 
-AI-powered semantic image comparison using Vision Language Models via Ollama.
+Hybrid image comparison combining pixelmatch for objective difference detection and Vision Language Models (via Ollama) for human-noticeability analysis.
+
+## Architecture Flow
+
+```text
+VLM Comparison Request
+         │
+         ▼
+Run Pixelmatch Comparison
+         │
+         ├─→ No Differences Found → Return OK Status
+         │
+         └─→ Differences Found
+                  │
+                  ▼
+            Save Diff Image
+                  │
+                  ▼
+    Run VLM with 3 Images:
+    (Baseline, Comparison, Diff)
+                  │
+                  ├─→ Not Noticeable → Override: Return OK Status
+                  │
+                  └─→ Noticeable → Return Unresolved with VLM Description
+```
 
 ## Quick Start
 
@@ -18,10 +42,9 @@ ollama serve
 
 ```bash
 # Recommended for accuracy
-ollama pull llava:7b
+ollama pull gemma3:12b
 
-# Or for speed (smaller, less accurate)
-ollama pull moondream
+# Note: Smaller models do not show proper results - use gemma3:12b only
 ```
 
 ### 3. Configure Backend
@@ -36,60 +59,44 @@ OLLAMA_BASE_URL=http://localhost:11434
 Set project's image comparison to `vlm` with config:
 ```json
 {
-  "model": "llava:7b",
+  "model": "gemma3:12b",
   "temperature": 0.1
 }
 ```
 
 Optional custom prompt (replaces default system prompt):
 ```json
 {
-  "model": "llava:7b",
+  "model": "gemma3:12b",
   "prompt": "Focus on button colors and text changes",
   "temperature": 0.1
 }
 ```
 
-**Note:** The `prompt` field replaces the entire system prompt. If omitted, a default system prompt is used that focuses on semantic differences while ignoring rendering artifacts.
+**Note:** The `prompt` field replaces the entire system prompt. If omitted, a default system prompt is used that analyzes the diff image to determine if highlighted differences are noticeable to humans.
 
 ## Recommended Models
 
-| Model | Size | Speed | Accuracy | Best For |
-|-------|------|-------|----------|----------|
-| `llava:7b` | 4.7GB | ⚡⚡ | ⭐⭐⭐ | **Recommended** - best balance (minimal) |
-| `gemma3:latest` | ~ | ⚡⚡ | ⭐⭐⭐ | Minimal model option |
-| `llava:13b` | 8GB | ⚡ | ⭐⭐⭐⭐ | Best accuracy |
-| `moondream` | 1.7GB | ⚡⚡⚡ | ⭐⭐ | Fast, may hallucinate |
-| `minicpm-v` | 5.5GB | ⚡⚡ | ⭐⭐⭐ | Good alternative |
+| Model | Size |
+|-------|------|
+| `gemma3:12b` | ~12GB - **Recommended** |
+
+**Note:** Models smaller than the default (`gemma3:12b`) have been tested and do not show proper results. They fail to follow structured output formats reliably and may produce incorrect or inconsistent responses. For production use, only use `gemma3:12b` or `llava:13b`.
 
 ## Configuration
 
 | Option | Type | Default | Description |
 |--------|------|---------|-------------|
-| `model` | string | `llava:7b` | Ollama vision model name |
+| `model` | string | `gemma3:12b` | Ollama vision model name |
 | `prompt` | string | System prompt (see below) | Custom prompt for image comparison |
 | `temperature` | number | `0.1` | Lower = more consistent results (0.0-1.0) |
 
-## How It Works
-
-1. VLM analyzes both images semantically
-2. Returns JSON with `{"identical": true/false, "description": "..."}` 
-3. `identical: true` = images match (pass), `identical: false` = differences found (fail)
-4. Ignores technical differences (anti-aliasing, shadows, 1-2px shifts)
-5. Provides description of differences found
-
-### Default System Prompt
-
-The default prompt instructs the model to:
-- **CHECK** for: data changes, missing/added elements, state changes, structural differences
-- **IGNORE**: rendering artifacts, anti-aliasing, shadows, minor pixel shifts
-
 ## API Endpoints
 
 ```bash
 # List available models
 GET /ollama/models
 
 # Compare two images (for testing)
-POST /ollama/compare?model=llava:7b&prompt=<prompt>&temperature=0.1
+POST /ollama/compare?model=gemma3:12b&prompt=<prompt>&temperature=0.1
 ```
@@ -16,7 +16,6 @@ jest.mock('ollama', () => {
   };
 });
 
-
 describe('OllamaService', () => {
   let service: OllamaService;
 
@@ -102,7 +101,8 @@ describe('OllamaService', () => {
       mockChat.mockResolvedValue(mockResponse);
 
       // Use a longer base64 string
-      const longBase64 = 'iVBORw0KGgoAAAANSUhEUgAAAAEAAAABCAYAAAAfFcSJAAAADUlEQVR42mNk+M9QDwADhgGAWjR9awAAAABJRU5ErkJggg==';
+      const longBase64 =
+        'iVBORw0KGgoAAAANSUhEUgAAAAEAAAABCAYAAAAfFcSJAAAADUlEQVR42mNk+M9QDwADhgGAWjR9awAAAABJRU5ErkJggg==';
       const result = await service.generate({
         model: 'llava',
         messages: [