KRLabsOrg
diff --git a/‎scripts/merge_lora.py‎
Lines changed: 27 additions & 0 deletions b/‎scripts/merge_lora.py‎
Lines changed: 27 additions & 0 deletions
diff --git a/‎squeez/encoder/__init__.py‎
Lines changed: 14 additions & 2 deletions b/‎squeez/encoder/__init__.py‎
Lines changed: 14 additions & 2 deletions
diff --git a/‎squeez/encoder/evaluate.py‎
Lines changed: 57 additions & 20 deletions b/‎squeez/encoder/evaluate.py‎
Lines changed: 57 additions & 20 deletions
@@ -68,6 +68,33 @@ def main(argv: list[str] | None = None) -> int:
         tokenizer,
         save_method="merged_16bit",
     )
+
+    # Copy VL processor files from base model if needed (e.g. preprocessor_config.json).
+    # Unsloth saves the VL architecture in config.json but _prepare_text_tokenizer
+    # strips the processor, so these files are missing from the merged output.
+    config_path = Path(args.output) / "config.json"
+    with open(config_path) as f:
+        saved_config = json.load(f)
+    archs = saved_config.get("architectures", [])
+    is_vl = any("ConditionalGeneration" in a or "VL" in a for a in archs)
+
+    if is_vl:
+        import shutil
+
+        from huggingface_hub import hf_hub_download
+
+        vl_files = ["preprocessor_config.json", "chat_template.json"]
+        for filename in vl_files:
+            dest = Path(args.output) / filename
+            if dest.exists():
+                continue
+            try:
+                src = hf_hub_download(base_model_name, filename)
+                shutil.copy(src, dest)
+                logger.info(f"Copied {filename} from {base_model_name}")
+            except Exception:
+                pass  # File may not exist for all models
+
     logger.info(f"Done. Merged model saved to {args.output}")
     return 0
 
 
@@ -1,15 +1,27 @@
 """Encoder-based line classifier for tool output extraction."""
 
-__all__ = ["SqueezEncoderConfig", "SqueezEncoderForLineClassification"]
+__all__ = [
+    "SqueezEncoderConfig",
+    "SqueezEncoderForLineClassification",
+    "PooledLineConfig",
+    "PooledLineClassifier",
+]
 
 
 def __getattr__(name: str):
     """Lazily import encoder model classes so lightweight helpers stay optional."""
-    if name in __all__:
+    if name in ("SqueezEncoderConfig", "SqueezEncoderForLineClassification"):
         from squeez.encoder.model import SqueezEncoderConfig, SqueezEncoderForLineClassification
 
         return {
             "SqueezEncoderConfig": SqueezEncoderConfig,
             "SqueezEncoderForLineClassification": SqueezEncoderForLineClassification,
         }[name]
+    if name in ("PooledLineConfig", "PooledLineClassifier"):
+        from squeez.encoder.sentence import PooledLineClassifier, PooledLineConfig
+
+        return {
+            "PooledLineConfig": PooledLineConfig,
+            "PooledLineClassifier": PooledLineClassifier,
+        }[name]
     raise AttributeError(f"module {__name__!r} has no attribute {name!r}")
@@ -1,4 +1,4 @@
-"""Evaluation script for the encoder line classifier.
+"""Evaluation script for encoder and pooled line classifiers.
 
 Runs inference on an eval set and computes the same metrics as the generative
 model's evaluate.py for direct comparison:
@@ -9,10 +9,18 @@
 - ROUGE-L
 - Compression ratio
 
+Supports both classifier types (auto-detected from model config):
+- token: SqueezEncoderForLineClassification (token-level with [LINE_SEP])
+- pooled: PooledLineClassifier (line-level mean-pool with [LINE_SEP])
+
 Usage:
     python -m squeez.encoder.evaluate \
         --model-path output/squeez_encoder \
         --eval-file data/encoder_test.jsonl
+
+    python -m squeez.encoder.evaluate \
+        --model-path output/squeez_pooled \
+        --eval-file data/encoder_test.jsonl
 """
 
 from __future__ import annotations
@@ -21,32 +29,70 @@
 import json
 import logging
 import statistics
+from pathlib import Path
 
 logger = logging.getLogger(__name__)
 
 
+def _load_model_and_tokenizer(model_path: str):
+    """Load encoder or pooled model from path (auto-detected)."""
+    import json
+
+    import torch
+    from transformers import AutoTokenizer
+
+    from squeez.encoder.model import LINE_SEP_TOKEN
+
+    config_path = Path(model_path) / "config.json"
+    model_type = "encoder"
+    if config_path.exists():
+        with open(config_path) as f:
+            cfg = json.load(f)
+        if cfg.get("model_type") == "squeez-pooled":
+            model_type = "pooled"
+
+    tokenizer = AutoTokenizer.from_pretrained(model_path, trust_remote_code=True)
+    if tokenizer.convert_tokens_to_ids(LINE_SEP_TOKEN) == tokenizer.unk_token_id:
+        tokenizer.add_special_tokens({"additional_special_tokens": [LINE_SEP_TOKEN]})
+
+    if model_type == "pooled":
+        from squeez.encoder.sentence import PooledLineClassifier
+
+        model = PooledLineClassifier.from_pretrained(model_path, trust_remote_code=True)
+    else:
+        from squeez.encoder.model import SqueezEncoderForLineClassification
+
+        model = SqueezEncoderForLineClassification.from_pretrained(
+            model_path, trust_remote_code=True
+        )
+
+    device = "cuda" if torch.cuda.is_available() else "cpu"
+    model = model.to(device)
+    model.eval()
+
+    return model, tokenizer, model_type
+
+
 def evaluate_encoder(
     model_path: str,
     eval_file: str,
     max_samples: int | None = None,
     threshold: float = 0.5,
     examples_output: str | None = None,
 ) -> dict:
-    """Evaluate the encoder model on an eval set.
+    """Evaluate an encoder or pooled model on an eval set.
+
+    Auto-detects model type from config.json (squeez-encoder vs squeez-pooled).
 
     Args:
-        model_path: Path to trained encoder model
+        model_path: Path to trained model
         eval_file: Path to encoder-format JSONL
         max_samples: Maximum samples to evaluate
         threshold: Relevance score threshold
 
     Returns:
         Dict with aggregate metrics (same format as generative evaluate.py)
     """
-    import torch
-    from transformers import AutoTokenizer
-
-    from squeez.encoder.model import LINE_SEP_TOKEN, SqueezEncoderForLineClassification
     from squeez.training.evaluate import (
         compute_compression_ratio,
         compute_empty_accuracy,
@@ -56,17 +102,8 @@ def evaluate_encoder(
         compute_span_metrics,
     )
 
-    logger.info(f"Loading encoder model from {model_path}")
-    tokenizer = AutoTokenizer.from_pretrained(model_path, trust_remote_code=True)
-
-    # Ensure LINE_SEP is in tokenizer
-    if tokenizer.convert_tokens_to_ids(LINE_SEP_TOKEN) == tokenizer.unk_token_id:
-        tokenizer.add_special_tokens({"additional_special_tokens": [LINE_SEP_TOKEN]})
-
-    model = SqueezEncoderForLineClassification.from_pretrained(model_path, trust_remote_code=True)
-    device = "cuda" if torch.cuda.is_available() else "cpu"
-    model = model.to(device)
-    model.eval()
+    logger.info(f"Loading model from {model_path}")
+    model, tokenizer, model_type = _load_model_and_tokenizer(model_path)
 
     # Load eval data
     samples = []
@@ -189,7 +226,7 @@ def evaluate_encoder(
 
     results["empty_confusion"] = empty_confusion
     results["num_samples"] = len(samples)
-    results["model_type"] = "encoder"
+    results["model_type"] = model_type
     results["threshold"] = threshold
 
     if examples_output:
@@ -198,7 +235,7 @@ def evaluate_encoder(
         logger.info(f"Saved per-sample examples to {examples_output}")
 
     logger.info("=" * 60)
-    logger.info("ENCODER EVALUATION RESULTS")
+    logger.info(f"EVALUATION RESULTS ({model_type})")
     logger.info("=" * 60)
     for key, stats in results.items():
         if isinstance(stats, dict) and "mean" in stats: