From fa366a8314097fde1fb7f184143b79a5b0d22505 Mon Sep 17 00:00:00 2001 From: akawincent Date: Sat, 28 Mar 2026 14:31:17 +0800 Subject: [PATCH 1/2] fix: enable frame sampling in internvl_hf --- lmms_eval/models/chat/internvl_hf.py | 3 +++ 1 file changed, 3 insertions(+) diff --git a/lmms_eval/models/chat/internvl_hf.py b/lmms_eval/models/chat/internvl_hf.py index e4f73f155..a8d26f004 100644 --- a/lmms_eval/models/chat/internvl_hf.py +++ b/lmms_eval/models/chat/internvl_hf.py @@ -249,6 +249,9 @@ def _collate(x): images_kwargs["min_patches"] = self.min_patches if self.max_patches is not None: images_kwargs["max_patches"] = self.max_patches + if self.num_frames is not None or self.fps is not None: + # InternVL only applies num_frames/fps when frame sampling is explicitly enabled. + videos_kwargs["do_sample_frames"] = True if self.num_frames is not None: videos_kwargs["num_frames"] = self.num_frames if self.fps is not None: From 24b7ea9ddd28465ff36ab1dead768c02b7941141 Mon Sep 17 00:00:00 2001 From: akawincent Date: Sat, 28 Mar 2026 14:31:17 +0800 Subject: [PATCH 2/2] fix: handle video-only internvl_hf inputs --- lmms_eval/models/chat/internvl_hf.py | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/lmms_eval/models/chat/internvl_hf.py b/lmms_eval/models/chat/internvl_hf.py index a8d26f004..ba3298782 100644 --- a/lmms_eval/models/chat/internvl_hf.py +++ b/lmms_eval/models/chat/internvl_hf.py @@ -263,6 +263,8 @@ def _collate(x): if self.accelerator.is_main_process and doc_id[0] % 100 == 0: eval_logger.debug(f"Prompt for doc ID {doc_id[0]}:\n\n{text}\n") + if len(visuals) == 0: + visuals = None if len(videos) == 0: videos = None inputs = self.processor( @@ -278,7 +280,7 @@ def _collate(x): # this is safe to assume because the `grouper` object ensures it. gen_kwargs = all_gen_kwargs[0] - gen_kwargs["image_sizes"] = [visuals[idx].size for idx in range(len(visuals))] + gen_kwargs["image_sizes"] = [visual.size for visual in visuals] if visuals is not None else [] if "max_new_tokens" not in gen_kwargs: gen_kwargs["max_new_tokens"] = 1024 if "temperature" not in gen_kwargs: