Skip to content

Commit c490a73

Browse files
committed
updated
1 parent aa43232 commit c490a73

259 files changed

Lines changed: 1115 additions & 61153 deletions

File tree

Some content is hidden

Large Commits have some content hidden by default. Use the searchbox below for content that may be hidden.

eval_encoder/attentive_probe.py

Lines changed: 3 additions & 15 deletions
Original file line numberDiff line numberDiff line change
@@ -197,25 +197,13 @@ def video_to_images(videos: torch.Tensor) -> torch.Tensor:
197197
frame_indices,
198198
total_frames.view(-1),
199199
target_frames
200-
) # [B, seq_len]
201-
# ===> 创建 target_frames 帧的空白视频 <===
202-
padded_videos = torch.zeros(bs, C, target_frames, H, W, device=device, dtype=videos.dtype)
203-
204-
# ===> 将原始帧放入插值后的对应位置 <===
205-
seq_len = frame_indices.shape[1]
206-
207-
# 准备 scatter 的索引
208-
frame_idx_expanded = interpolated_indices.view(bs, 1, seq_len, 1, 1).expand(bs, C, seq_len, H, W)
209-
210-
# 将视频帧放入对应位置
211-
padded_videos.scatter_(dim=2, index=frame_idx_expanded, src=videos)
212-
200+
)
213201
# ===> 计算 visible_index (基于 target_frames) <===
214202
per = torch.arange(frame_tokens, device=device)
215203
visible_index = (interpolated_indices.unsqueeze(-1) * frame_tokens + per).reshape(bs, -1)
216204
visible_index = visible_index.clamp_max(target_frames * frame_tokens - 1)
217205

218-
enc_out = model(padded_videos, visible_index)
206+
enc_out = model(videos, visible_index)
219207
if hasattr(enc_out, "last_hidden_state"):
220208
outputs = enc_out.last_hidden_state
221209
else:
@@ -415,7 +403,7 @@ def get_model(args: argparse.Namespace) -> nn.Module:
415403

416404
if args.model_name == "hf_llava_vit_large_ln_auto":
417405
model = AutoModel.from_pretrained(
418-
"/video_vit/xiangan/LLaVA-ViT/ov-encoder-large",
406+
"/video_vit/xiangan/LLaVA-ViT/onevision-encoder-large",
419407
trust_remote_code=True,
420408
attn_implementation="flash_attention_2"
421409
)

eval_encoder/deprecated/eval_llava_vit_b16_mix_residual_frames.sh

Lines changed: 0 additions & 12 deletions
This file was deleted.

eval_encoder/deprecated/eval_llava_vit_b16_mix_residual_frames_tiling.sh

Lines changed: 0 additions & 23 deletions
This file was deleted.

eval_encoder/deprecated/eval_llava_vit_b16_mv_three_input.sh

Lines changed: 0 additions & 60 deletions
This file was deleted.

eval_encoder/deprecated/eval_llava_vit_b16_mv_three_input_final.sh

Lines changed: 0 additions & 61 deletions
This file was deleted.

eval_encoder/deprecated/eval_llava_vit_b16_residual.sh

Lines changed: 0 additions & 14 deletions
This file was deleted.

eval_encoder/deprecated/eval_llava_vit_b16_residual_tiling.sh

Lines changed: 0 additions & 20 deletions
This file was deleted.

0 commit comments

Comments
 (0)