From 5321e50ba86f29f4609e978a2defc0220464bba7 Mon Sep 17 00:00:00 2001 From: akawincent Date: Fri, 27 Mar 2026 20:41:37 +0800 Subject: [PATCH 1/2] Fix vLLM top_p=0 handling --- lmms_eval/models/chat/vllm.py | 7 +- lmms_eval/models/chat/vllm_generate.py | 7 +- lmms_eval/models/simple/vllm.py | 27 ++++++-- test/models/test_vllm_sampling_params.py | 85 ++++++++++++++++++++++++ 4 files changed, 107 insertions(+), 19 deletions(-) create mode 100644 test/models/test_vllm_sampling_params.py diff --git a/lmms_eval/models/chat/vllm.py b/lmms_eval/models/chat/vllm.py index 24bc1099c..346533eae 100644 --- a/lmms_eval/models/chat/vllm.py +++ b/lmms_eval/models/chat/vllm.py @@ -69,12 +69,7 @@ def make_one_request(self, request: Instance) -> Tuple[list[dict], dict]: _gen["max_new_tokens"] = self._select_max_new_tokens(_gen.get("max_new_tokens")) _gen.setdefault("temperature", 0) _gen.setdefault("top_p", 0.95) - - params = { - "temperature": _gen["temperature"], - "max_tokens": _gen["max_new_tokens"], - "top_p": _gen["top_p"], - } + params = self._build_sampling_params_dict(_gen) video_kwargs = { "max_pixels": self.max_pixels, diff --git a/lmms_eval/models/chat/vllm_generate.py b/lmms_eval/models/chat/vllm_generate.py index 2b4e289f0..9b71d23ac 100644 --- a/lmms_eval/models/chat/vllm_generate.py +++ b/lmms_eval/models/chat/vllm_generate.py @@ -90,12 +90,7 @@ def make_one_request(self, request: Instance) -> Tuple[list[dict], dict]: _gen["max_new_tokens"] = self._select_max_new_tokens(_gen.get("max_new_tokens")) _gen.setdefault("temperature", 0) _gen.setdefault("top_p", 0.95) - - params = { - "temperature": _gen["temperature"], - "max_tokens": _gen["max_new_tokens"], - "top_p": _gen["top_p"], - } + params = self._build_sampling_params_dict(_gen) video_kwargs = { "max_pixels": self.max_pixels, diff --git a/lmms_eval/models/simple/vllm.py b/lmms_eval/models/simple/vllm.py index 2a83c633c..22946997e 100644 --- a/lmms_eval/models/simple/vllm.py +++ b/lmms_eval/models/simple/vllm.py @@ -337,6 +337,25 @@ def _select_max_new_tokens(self, request_max_new_tokens: Any) -> int: return self.max_new_tokens return max(request_max_new_tokens, self.max_new_tokens) + @staticmethod + def _normalize_top_p_for_vllm(top_p: Any) -> Any: + if isinstance(top_p, bool): + return top_p + try: + numeric_top_p = float(top_p) + except (TypeError, ValueError): + return top_p + if numeric_top_p == 0.0: + return 1.0 + return top_p + + def _build_sampling_params_dict(self, gen_kwargs: dict[str, Any]) -> dict[str, Any]: + return { + "max_tokens": gen_kwargs["max_new_tokens"], + "temperature": gen_kwargs["temperature"], + "top_p": self._normalize_top_p_for_vllm(gen_kwargs["top_p"]), + } + def _run_tp_synced( self, local_inputs: list[Any], @@ -456,13 +475,7 @@ def generate_until(self, requests) -> List[str]: gen_kwargs["max_new_tokens"] = self._select_max_new_tokens(gen_kwargs.get("max_new_tokens")) gen_kwargs.setdefault("temperature", 0) gen_kwargs.setdefault("top_p", 0.95) - - params = { - "max_tokens": gen_kwargs["max_new_tokens"], - "temperature": gen_kwargs["temperature"], - "top_p": gen_kwargs["top_p"], - } - sampling_params = SamplingParams(**params) + sampling_params = SamplingParams(**self._build_sampling_params_dict(gen_kwargs)) visuals = [doc_to_visual(self.task_dict[task][split][doc_id])] if None in visuals: diff --git a/test/models/test_vllm_sampling_params.py b/test/models/test_vllm_sampling_params.py new file mode 100644 index 000000000..0e061ecee --- /dev/null +++ b/test/models/test_vllm_sampling_params.py @@ -0,0 +1,85 @@ +import types +import unittest +from unittest.mock import patch + +from lmms_eval.models.chat.vllm import VLLM as ChatVLLM +from lmms_eval.models.chat.vllm_generate import VLLMGenerate +from lmms_eval.models.simple.vllm import VLLM as SimpleVLLM + + +class _FakeChatMessages: + def __init__(self, messages): + self.messages = messages + + def to_openai_messages(self, video_kwargs=None): + return [{"role": "user", "content": [{"type": "text", "text": "Describe the input"}]}] + + def to_hf_messages(self, video_kwargs=None): + return [{"role": "user", "content": [{"type": "text", "text": "Describe the input"}]}] + + def extract_media(self): + return [], [], [] + + +class _FakeProcessor: + def apply_chat_template(self, messages, tokenize=False, add_generation_prompt=True): + return "prompt" + + +class TestVllmSamplingParams(unittest.TestCase): + def _make_simple_model(self): + model = SimpleVLLM.__new__(SimpleVLLM) + model.max_new_tokens = 1024 + return model + + def _make_chat_model(self, cls): + model = cls.__new__(cls) + model.max_new_tokens = 1024 + model.max_pixels = 1024 + model.min_image_pixels = 28 + model.max_frame_num = 32 + model.fps = None + model.nframes = 8 + model.processor = _FakeProcessor() + model.task_dict = {"demo_task": {"test": [{"id": 0}]}} + return model + + def test_build_sampling_params_clamps_zero_top_p(self): + model = self._make_simple_model() + + params = model._build_sampling_params_dict({"max_new_tokens": 128, "temperature": 0, "top_p": 0}) + + self.assertEqual(params["top_p"], 1.0) + + def test_build_sampling_params_preserves_valid_top_p(self): + model = self._make_simple_model() + + params = model._build_sampling_params_dict({"max_new_tokens": 128, "temperature": 0, "top_p": 0.95}) + + self.assertEqual(params["top_p"], 0.95) + + def test_chat_make_one_request_clamps_zero_top_p(self): + model = self._make_chat_model(ChatVLLM) + request = types.SimpleNamespace( + arguments=("Describe the input", lambda doc: [{"role": "user", "content": []}], {"temperature": 0, "top_p": 0}, 0, "demo_task", "test"), + ) + + with patch("lmms_eval.models.chat.vllm.ChatMessages", _FakeChatMessages): + _, params = model.make_one_request(request) + + self.assertEqual(params["top_p"], 1.0) + + def test_generate_make_one_request_clamps_zero_top_p(self): + model = self._make_chat_model(VLLMGenerate) + request = types.SimpleNamespace( + arguments=("Describe the input", lambda doc: [{"role": "user", "content": []}], {"temperature": 0, "top_p": 0}, 0, "demo_task", "test"), + ) + + with patch("lmms_eval.models.chat.vllm_generate.ChatMessages", _FakeChatMessages): + _, params = model.make_one_request(request) + + self.assertEqual(params["top_p"], 1.0) + + +if __name__ == "__main__": + unittest.main() From 734f00c3514a8b976e34528d0bff246fd7ac1367 Mon Sep 17 00:00:00 2001 From: wincent <3511606256@qq.com> Date: Thu, 9 Apr 2026 21:45:57 +0800 Subject: [PATCH 2/2] remove test file --- test/models/test_vllm_sampling_params.py | 85 ------------------------ 1 file changed, 85 deletions(-) delete mode 100644 test/models/test_vllm_sampling_params.py diff --git a/test/models/test_vllm_sampling_params.py b/test/models/test_vllm_sampling_params.py deleted file mode 100644 index 0e061ecee..000000000 --- a/test/models/test_vllm_sampling_params.py +++ /dev/null @@ -1,85 +0,0 @@ -import types -import unittest -from unittest.mock import patch - -from lmms_eval.models.chat.vllm import VLLM as ChatVLLM -from lmms_eval.models.chat.vllm_generate import VLLMGenerate -from lmms_eval.models.simple.vllm import VLLM as SimpleVLLM - - -class _FakeChatMessages: - def __init__(self, messages): - self.messages = messages - - def to_openai_messages(self, video_kwargs=None): - return [{"role": "user", "content": [{"type": "text", "text": "Describe the input"}]}] - - def to_hf_messages(self, video_kwargs=None): - return [{"role": "user", "content": [{"type": "text", "text": "Describe the input"}]}] - - def extract_media(self): - return [], [], [] - - -class _FakeProcessor: - def apply_chat_template(self, messages, tokenize=False, add_generation_prompt=True): - return "prompt" - - -class TestVllmSamplingParams(unittest.TestCase): - def _make_simple_model(self): - model = SimpleVLLM.__new__(SimpleVLLM) - model.max_new_tokens = 1024 - return model - - def _make_chat_model(self, cls): - model = cls.__new__(cls) - model.max_new_tokens = 1024 - model.max_pixels = 1024 - model.min_image_pixels = 28 - model.max_frame_num = 32 - model.fps = None - model.nframes = 8 - model.processor = _FakeProcessor() - model.task_dict = {"demo_task": {"test": [{"id": 0}]}} - return model - - def test_build_sampling_params_clamps_zero_top_p(self): - model = self._make_simple_model() - - params = model._build_sampling_params_dict({"max_new_tokens": 128, "temperature": 0, "top_p": 0}) - - self.assertEqual(params["top_p"], 1.0) - - def test_build_sampling_params_preserves_valid_top_p(self): - model = self._make_simple_model() - - params = model._build_sampling_params_dict({"max_new_tokens": 128, "temperature": 0, "top_p": 0.95}) - - self.assertEqual(params["top_p"], 0.95) - - def test_chat_make_one_request_clamps_zero_top_p(self): - model = self._make_chat_model(ChatVLLM) - request = types.SimpleNamespace( - arguments=("Describe the input", lambda doc: [{"role": "user", "content": []}], {"temperature": 0, "top_p": 0}, 0, "demo_task", "test"), - ) - - with patch("lmms_eval.models.chat.vllm.ChatMessages", _FakeChatMessages): - _, params = model.make_one_request(request) - - self.assertEqual(params["top_p"], 1.0) - - def test_generate_make_one_request_clamps_zero_top_p(self): - model = self._make_chat_model(VLLMGenerate) - request = types.SimpleNamespace( - arguments=("Describe the input", lambda doc: [{"role": "user", "content": []}], {"temperature": 0, "top_p": 0}, 0, "demo_task", "test"), - ) - - with patch("lmms_eval.models.chat.vllm_generate.ChatMessages", _FakeChatMessages): - _, params = model.make_one_request(request) - - self.assertEqual(params["top_p"], 1.0) - - -if __name__ == "__main__": - unittest.main()