From 94af43e02f35f0621ea1c72b167729048d26e6f0 Mon Sep 17 00:00:00 2001
From: Tai An <antai12232931@outlook.com>
Date: Thu, 4 Jun 2026 00:08:06 -0700
Subject: [PATCH] fix(classify): honor raw_scores by returning logits and
 applying softmax conditionally

The text-classification pipeline applies softmax internally, so the `/classify` endpoint always returned softmax probabilities regardless of the `raw_scores` flag (the post-processing block was a no-op). Pass function_to_apply="none" so the model emits raw logits, then apply softmax in the batch handler only when raw_scores is False, mirroring the rerank path.

Fixes #658
---
 .../infinity_emb/inference/batch_handler.py         | 13 +++++++++----
 .../infinity_emb/transformer/classifier/optimum.py  |  4 ++--
 .../infinity_emb/transformer/classifier/torch.py    | 10 ++++++++--
 3 files changed, 19 insertions(+), 8 deletions(-)

diff --git a/libs/infinity_emb/infinity_emb/inference/batch_handler.py b/libs/infinity_emb/infinity_emb/inference/batch_handler.py
index 1cb48aa9..d0acc9b9 100644
--- a/libs/infinity_emb/infinity_emb/inference/batch_handler.py
+++ b/libs/infinity_emb/infinity_emb/inference/batch_handler.py
@@ -244,9 +244,14 @@ async def classify(
         items = [PredictSingle(sentence=s) for s in sentences]
         classifications, usage = await self._schedule(items)
 
-        if raw_scores:
-            # perform softmax on scores
-            pass
+        if not raw_scores:
+            # the model returns raw logits; convert them to probabilities
+            for prediction in classifications:
+                logits = np.array([label["score"] for label in prediction])
+                exp = np.exp(logits - logits.max())
+                probs = exp / exp.sum()
+                for label, prob in zip(prediction, probs):
+                    label["score"] = float(prob)
 
         return classifications, usage
 
@@ -621,4 +626,4 @@ def _postprocess_batch(self):
                 self._postprocess_queue.task_done()
         except Exception as ex:
             logger.exception(ex)
-            raise ValueError("Postprocessor crashed")
+            raise ValueError("Postprocessor crashed")
\ No newline at end of file
diff --git a/libs/infinity_emb/infinity_emb/transformer/classifier/optimum.py b/libs/infinity_emb/infinity_emb/transformer/classifier/optimum.py
index 9c2c78d1..592acaaa 100644
--- a/libs/infinity_emb/infinity_emb/transformer/classifier/optimum.py
+++ b/libs/infinity_emb/infinity_emb/transformer/classifier/optimum.py
@@ -70,7 +70,7 @@ def encode_pre(self, sentences: list[str]):
         return sentences
 
     def encode_core(self, sentences: list[str]) -> dict:
-        outputs = self._pipe(sentences)
+        outputs = self._pipe(sentences, function_to_apply="none")
         return outputs
 
     def encode_post(self, classes) -> dict[str, float]:
@@ -86,4 +86,4 @@ def tokenize_lengths(self, sentences: list[str]) -> list[int]:
             return_attention_mask=False,
             return_length=False,
         ).encodings
-        return [len(t.tokens) for t in tks]
+        return [len(t.tokens) for t in tks]
\ No newline at end of file
diff --git a/libs/infinity_emb/infinity_emb/transformer/classifier/torch.py b/libs/infinity_emb/infinity_emb/transformer/classifier/torch.py
index 3c9e7045..624bed20 100644
--- a/libs/infinity_emb/infinity_emb/transformer/classifier/torch.py
+++ b/libs/infinity_emb/infinity_emb/transformer/classifier/torch.py
@@ -73,7 +73,13 @@ def encode_pre(self, sentences: list[str]):
 
     def encode_core(self, features):
         """runs plain inference, on cpu/gpu"""
-        return self._pipe(features, batch_size=256, truncation=True, padding=True)
+        return self._pipe(
+            features,
+            batch_size=256,
+            truncation=True,
+            padding=True,
+            function_to_apply="none",
+        )
 
     def encode_post(self, classes) -> dict[str, float]:
         """runs post encoding such as normalization"""
@@ -88,4 +94,4 @@ def tokenize_lengths(self, sentences: list[str]) -> list[int]:
             return_attention_mask=False,
             return_length=False,
         ).encodings
-        return [len(t.tokens) for t in tks]
+        return [len(t.tokens) for t in tks]
\ No newline at end of file