From 94af43e02f35f0621ea1c72b167729048d26e6f0 Mon Sep 17 00:00:00 2001 From: Tai An Date: Thu, 4 Jun 2026 00:08:06 -0700 Subject: [PATCH] fix(classify): honor raw_scores by returning logits and applying softmax conditionally The text-classification pipeline applies softmax internally, so the `/classify` endpoint always returned softmax probabilities regardless of the `raw_scores` flag (the post-processing block was a no-op). Pass function_to_apply="none" so the model emits raw logits, then apply softmax in the batch handler only when raw_scores is False, mirroring the rerank path. Fixes #658 --- .../infinity_emb/inference/batch_handler.py | 13 +++++++++---- .../infinity_emb/transformer/classifier/optimum.py | 4 ++-- .../infinity_emb/transformer/classifier/torch.py | 10 ++++++++-- 3 files changed, 19 insertions(+), 8 deletions(-) diff --git a/libs/infinity_emb/infinity_emb/inference/batch_handler.py b/libs/infinity_emb/infinity_emb/inference/batch_handler.py index 1cb48aa9..d0acc9b9 100644 --- a/libs/infinity_emb/infinity_emb/inference/batch_handler.py +++ b/libs/infinity_emb/infinity_emb/inference/batch_handler.py @@ -244,9 +244,14 @@ async def classify( items = [PredictSingle(sentence=s) for s in sentences] classifications, usage = await self._schedule(items) - if raw_scores: - # perform softmax on scores - pass + if not raw_scores: + # the model returns raw logits; convert them to probabilities + for prediction in classifications: + logits = np.array([label["score"] for label in prediction]) + exp = np.exp(logits - logits.max()) + probs = exp / exp.sum() + for label, prob in zip(prediction, probs): + label["score"] = float(prob) return classifications, usage @@ -621,4 +626,4 @@ def _postprocess_batch(self): self._postprocess_queue.task_done() except Exception as ex: logger.exception(ex) - raise ValueError("Postprocessor crashed") + raise ValueError("Postprocessor crashed") \ No newline at end of file diff --git a/libs/infinity_emb/infinity_emb/transformer/classifier/optimum.py b/libs/infinity_emb/infinity_emb/transformer/classifier/optimum.py index 9c2c78d1..592acaaa 100644 --- a/libs/infinity_emb/infinity_emb/transformer/classifier/optimum.py +++ b/libs/infinity_emb/infinity_emb/transformer/classifier/optimum.py @@ -70,7 +70,7 @@ def encode_pre(self, sentences: list[str]): return sentences def encode_core(self, sentences: list[str]) -> dict: - outputs = self._pipe(sentences) + outputs = self._pipe(sentences, function_to_apply="none") return outputs def encode_post(self, classes) -> dict[str, float]: @@ -86,4 +86,4 @@ def tokenize_lengths(self, sentences: list[str]) -> list[int]: return_attention_mask=False, return_length=False, ).encodings - return [len(t.tokens) for t in tks] + return [len(t.tokens) for t in tks] \ No newline at end of file diff --git a/libs/infinity_emb/infinity_emb/transformer/classifier/torch.py b/libs/infinity_emb/infinity_emb/transformer/classifier/torch.py index 3c9e7045..624bed20 100644 --- a/libs/infinity_emb/infinity_emb/transformer/classifier/torch.py +++ b/libs/infinity_emb/infinity_emb/transformer/classifier/torch.py @@ -73,7 +73,13 @@ def encode_pre(self, sentences: list[str]): def encode_core(self, features): """runs plain inference, on cpu/gpu""" - return self._pipe(features, batch_size=256, truncation=True, padding=True) + return self._pipe( + features, + batch_size=256, + truncation=True, + padding=True, + function_to_apply="none", + ) def encode_post(self, classes) -> dict[str, float]: """runs post encoding such as normalization""" @@ -88,4 +94,4 @@ def tokenize_lengths(self, sentences: list[str]) -> list[int]: return_attention_mask=False, return_length=False, ).encodings - return [len(t.tokens) for t in tks] + return [len(t.tokens) for t in tks] \ No newline at end of file