michaelfeil · Anai-Guo · Jun 4, 2026 · gemini-code-assist · Jun 4, 2026 · chatgpt-codex-connector
diff --git a/libs/infinity_emb/infinity_emb/inference/batch_handler.py b/libs/infinity_emb/infinity_emb/inference/batch_handler.py
@@ -244,9 +244,14 @@ async def classify(
         items = [PredictSingle(sentence=s) for s in sentences]
         classifications, usage = await self._schedule(items)
 
-        if raw_scores:
-            # perform softmax on scores
-            pass
+        if not raw_scores:
+            # the model returns raw logits; convert them to probabilities
+            for prediction in classifications:
+                logits = np.array([label["score"] for label in prediction])
+                exp = np.exp(logits - logits.max())
+                probs = exp / exp.sum()
+                for label, prob in zip(prediction, probs):
+                    label["score"] = float(prob)
 
         return classifications, usage
 
@@ -621,4 +626,4 @@ def _postprocess_batch(self):
                 self._postprocess_queue.task_done()
         except Exception as ex:
             logger.exception(ex)
-            raise ValueError("Postprocessor crashed")
+            raise ValueError("Postprocessor crashed")
diff --git a/libs/infinity_emb/infinity_emb/transformer/classifier/optimum.py b/libs/infinity_emb/infinity_emb/transformer/classifier/optimum.py
@@ -70,7 +70,7 @@ def encode_pre(self, sentences: list[str]):
         return sentences
 
     def encode_core(self, sentences: list[str]) -> dict:
-        outputs = self._pipe(sentences)
+        outputs = self._pipe(sentences, function_to_apply="none")
         return outputs
 
     def encode_post(self, classes) -> dict[str, float]:
@@ -86,4 +86,4 @@ def tokenize_lengths(self, sentences: list[str]) -> list[int]:
             return_attention_mask=False,
             return_length=False,
         ).encodings
-        return [len(t.tokens) for t in tks]
+        return [len(t.tokens) for t in tks]
diff --git a/libs/infinity_emb/infinity_emb/transformer/classifier/torch.py b/libs/infinity_emb/infinity_emb/transformer/classifier/torch.py
@@ -73,7 +73,13 @@ def encode_pre(self, sentences: list[str]):
 
     def encode_core(self, features):
         """runs plain inference, on cpu/gpu"""
-        return self._pipe(features, batch_size=256, truncation=True, padding=True)
+        return self._pipe(
+            features,
+            batch_size=256,
+            truncation=True,
+            padding=True,
+            function_to_apply="none",
+        )
 
     def encode_post(self, classes) -> dict[str, float]:
         """runs post encoding such as normalization"""
@@ -88,4 +94,4 @@ def tokenize_lengths(self, sentences: list[str]) -> list[int]:
             return_attention_mask=False,
             return_length=False,
         ).encodings
-        return [len(t.tokens) for t in tks]
+        return [len(t.tokens) for t in tks]
-        return [len(t.tokens) for t in tks]
+        return [len(t.tokens) for t in tks]
-        return [len(t.tokens) for t in tks]
+        return [len(t.tokens) for t in tks]