Skip to content

Commit f77eee3

Browse files
Initial commit
1 parent 0166712 commit f77eee3

237 files changed

Lines changed: 56463 additions & 0 deletions

File tree

Some content is hidden

Large Commits have some content hidden by default. Use the searchbox below for content that may be hidden.

Encoder_Eval/README.md

Lines changed: 54 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,54 @@
1+
# Encoder_Eval: A Unified Evaluation Suite for Video and Image Encoders
2+
3+
This repository provides a unified evaluation framework for benchmarking **video** and **image** encoders across diverse tasks, including **linear probing**, **attentive probing**, **dense segmentation**, and **object detection**.
4+
5+
---
6+
7+
## 📅 Project Progress
8+
9+
### ✅ Completed
10+
- [x] `video_attentive_probe`: Attention-based probing for video encoders
11+
- [x] `video_linear_probe`: Linear probing for video encoders
12+
13+
### ⬜ Upcoming / In Development
14+
- [ ] `image_attentive_probe`: Attention-based probing for image encoders
15+
- [ ] `image_linear_probe`: Linear probing for image encoders
16+
- [ ] `dense_segmentation`: Dense prediction benchmarking (image/video)
17+
- [ ] `object_detection`: Detection task evaluation (image/video)
18+
19+
---
20+
21+
## 💡 Key Features
22+
- Support for both **video** and **image** modalities.
23+
- Modular design for easy integration of new probing techniques.
24+
- Standardized evaluation pipelines for encoder representations.
25+
- Designed to benchmark both **frozen** and **fine-tuned** encoders.
26+
27+
## 🔧 Setup
28+
29+
```bash
30+
# Clone the repo
31+
git clone git@github.com:FeilongTangmonash/Encoder_Eval.git
32+
cd Encoder_Eval
33+
34+
# Install dependencies
35+
pip install -r requirements.txt
36+
```
37+
## 🧱 code structure
38+
39+
<pre>
40+
video_vit/
41+
└── video_encoder_eval/
42+
└── video_linear_probe/
43+
└── checkpoint/
44+
└── mlcd_base/
45+
└── backbone_base224.pt
46+
</pre>
47+
48+
49+
## 🚀 Usage
50+
We provide example scripts to perform a full evaluation of the UMT model using both the attentive probe and the linear probe methods. Simply run the commands below:
51+
```
52+
bash src/video_attentive_probe.sh
53+
bash src/video_linear_probe.sh
54+
```
Lines changed: 116 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,116 @@
1+
export NUM_GPUS=8
2+
export NNODES=1
3+
export RANK=0
4+
export ADDR="127.0.0.1"
5+
export PORT="32509"
6+
#pt=pretrain ppt=post-pretrain ft=finetune
7+
8+
TRAIN_DATA_ROOT_PATH=/path/to/train/video
9+
TRAIN_DATA_CSV_PATH=/path/to/train/video
10+
VAL_DATA_ROOT_PATH=/path/to/val/video
11+
VAL_DATA_CSV_PATH=/path/to/val/csv
12+
OUTPUT=/path/to/output
13+
MODEL_NAME='umt'
14+
15+
FINETUNE=/path/to/ckpt
16+
model='vit_large_patch16_224'
17+
EMBEDDING_SIZE=1024
18+
PATCH_SIZE=16
19+
NUM_FRAMES=8
20+
INPUT_SIZE=224
21+
TUBELET_SIZE=1
22+
BATCH_SIZE=32
23+
24+
for SEED in 1
25+
do
26+
for DATASET in ssv2 k400 k600 k700 hmdb51 ucf101 epic_verb epic_noun perception_test diving48 CharadesEgo CharadesEgo_v1_only1st CharadesEgo_v1_only3rd
27+
do
28+
for NUM_SHOTS in 50
29+
do
30+
echo "SEED: $SEED"
31+
echo "DATASET: $DATASET"
32+
echo "NUM_SHOTS: $NUM_SHOTS"
33+
34+
FLASH=1 torchrun --nproc_per_node="${NUM_GPUS}" --nnodes="${NNODES}" \
35+
--node_rank="${RANK}" --master_addr="${ADDR}" --master_port="${PORT}" \
36+
ac_export_feature_and_attentive_probe.py \
37+
--embedding_size ${EMBEDDING_SIZE} \
38+
--data_set ${DATASET} \
39+
--seed ${SEED} \
40+
--num_shots ${NUM_SHOTS} \
41+
--num_step 8 \
42+
--train_data_root_path ${TRAIN_DATA_ROOT_PATH} \
43+
--train_data_csv_path ${TRAIN_DATA_CSV_PATH} \
44+
--val_data_root_path ${VAL_DATA_ROOT_PATH} \
45+
--val_data_csv_path ${VAL_DATA_CSV_PATH} \
46+
--save_report ${OUTPUT} \
47+
--batch_size ${BATCH_SIZE} \
48+
--model_name ${MODEL_NAME} \
49+
--model ${model} \
50+
--finetune ${FINETUNE} \
51+
--num_frames ${NUM_FRAMES} \
52+
--input_size ${INPUT_SIZE} \
53+
--tubelet_size ${TUBELET_SIZE} \
54+
--patch_size ${PATCH_SIZE}
55+
done
56+
done
57+
done
58+
59+
# Due to the small dataset size, the following dataset raises errors when using 8 GPUs with a large batch size.
60+
export NUM_GPUS=1
61+
export NNODES=1
62+
export RANK=0
63+
export ADDR="127.0.0.1"
64+
export PORT="32509"
65+
#pt=pretrain ppt=post-pretrain ft=finetune
66+
67+
TRAIN_DATA_ROOT_PATH=/path/to/train/video
68+
TRAIN_DATA_CSV_PATH=/path/to/train/video
69+
VAL_DATA_ROOT_PATH=/path/to/val/video
70+
VAL_DATA_CSV_PATH=/path/to/val/csv
71+
OUTPUT=/path/to/output
72+
MODEL_NAME='umt'
73+
74+
FINETUNE=/path/to/ckpt
75+
model='vit_large_patch16_224'
76+
EMBEDDING_SIZE=1024
77+
PATCH_SIZE=16
78+
NUM_FRAMES=8
79+
INPUT_SIZE=224
80+
TUBELET_SIZE=1
81+
BATCH_SIZE=32
82+
83+
for SEED in 1
84+
do
85+
for DATASET in RareAct Drone_Action
86+
do
87+
for NUM_SHOTS in 50
88+
do
89+
echo "SEED: $SEED"
90+
echo "DATASET: $DATASET"
91+
echo "NUM_SHOTS: $NUM_SHOTS"
92+
93+
FLASH=1 torchrun --nproc_per_node="${NUM_GPUS}" --nnodes="${NNODES}" \
94+
--node_rank="${RANK}" --master_addr="${ADDR}" --master_port="${PORT}" \
95+
ac_export_feature_and_attentive_probe.py \
96+
--embedding_size ${EMBEDDING_SIZE} \
97+
--data_set ${DATASET} \
98+
--seed ${SEED} \
99+
--num_shots ${NUM_SHOTS} \
100+
--num_step 8 \
101+
--train_data_root_path ${TRAIN_DATA_ROOT_PATH} \
102+
--train_data_csv_path ${TRAIN_DATA_CSV_PATH} \
103+
--val_data_root_path ${VAL_DATA_ROOT_PATH} \
104+
--val_data_csv_path ${VAL_DATA_CSV_PATH} \
105+
--save_report ${OUTPUT} \
106+
--batch_size ${BATCH_SIZE} \
107+
--model_name ${MODEL_NAME} \
108+
--model ${model} \
109+
--finetune ${FINETUNE} \
110+
--num_frames ${NUM_FRAMES} \
111+
--input_size ${INPUT_SIZE} \
112+
--tubelet_size ${TUBELET_SIZE} \
113+
--patch_size ${PATCH_SIZE}
114+
done
115+
done
116+
done
Lines changed: 104 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,104 @@
1+
export NUM_GPUS=8
2+
export NNODES=1
3+
export RANK=0
4+
export ADDR="127.0.0.1"
5+
export PORT="32509"
6+
#pt=pretrain ppt=post-pretrain ft=finetune
7+
8+
TRAIN_DATA_ROOT_PATH=/path/to/train/video
9+
TRAIN_DATA_CSV_PATH=/path/to/train/video
10+
VAL_DATA_ROOT_PATH=/path/to/val/video
11+
VAL_DATA_CSV_PATH=/path/to/val/csv
12+
OUTPUT=/path/to/output
13+
MODEL_NAME='umt'
14+
15+
16+
FINETUNE=/path/to/ckpt
17+
model='vit_large_patch16_224'
18+
EMBEDDING_SIZE=768
19+
PATCH_SIZE=16
20+
NUM_FRAMES=8
21+
INPUT_SIZE=224
22+
TUBELET_SIZE=1
23+
BATCH_SIZE=32
24+
25+
for SEED in 1
26+
do
27+
for DATASET in ssv2 k400 k600 k700 hmdb51 ucf101 epic_verb epic_noun perception_test diving48 CharadesEgo CharadesEgo_v1_only1st CharadesEgo_v1_only3rd
28+
do
29+
for NUM_SHOTS in 50
30+
do
31+
echo "SEED: $SEED"
32+
echo "DATASET: $DATASET"
33+
echo "NUM_SHOTS: $NUM_SHOTS"
34+
35+
FLASH=1 torchrun --nproc_per_node="${NUM_GPUS}" --nnodes="${NNODES}" \
36+
--node_rank="${RANK}" --master_addr="${ADDR}" --master_port="${PORT}" \
37+
ac_export_feature_and_linear_probe.py \
38+
--embedding_size ${EMBEDDING_SIZE} \
39+
--data_set ${DATASET} \
40+
--seed ${SEED} \
41+
--num_shots ${NUM_SHOTS} \
42+
--num_step 8 \
43+
--train_data_root_path ${TRAIN_DATA_ROOT_PATH} \
44+
--train_data_csv_path ${TRAIN_DATA_CSV_PATH} \
45+
--val_data_root_path ${VAL_DATA_ROOT_PATH} \
46+
--val_data_csv_path ${VAL_DATA_CSV_PATH} \
47+
--save_report ${OUTPUT} \
48+
--batch_size ${BATCH_SIZE} \
49+
--model_name ${MODEL_NAME} \
50+
--model ${model} \
51+
--finetune ${FINETUNE} \
52+
--num_frames ${NUM_FRAMES} \
53+
--input_size ${INPUT_SIZE} \
54+
--tubelet_size ${TUBELET_SIZE} \
55+
--patch_size ${PATCH_SIZE}
56+
done
57+
done
58+
done
59+
60+
61+
# Due to the small dataset size, the following dataset raises errors when using 8 GPUs with a large batch size.
62+
FINETUNE=/path/to/ckpt
63+
model='vit_large_patch16_224'
64+
EMBEDDING_SIZE=768
65+
PATCH_SIZE=16
66+
NUM_FRAMES=8
67+
INPUT_SIZE=224
68+
TUBELET_SIZE=1
69+
BATCH_SIZE=32
70+
71+
for SEED in 1
72+
do
73+
for DATASET in RareAct Drone_Action
74+
do
75+
for NUM_SHOTS in 10
76+
do
77+
echo "SEED: $SEED"
78+
echo "DATASET: $DATASET"
79+
echo "NUM_SHOTS: $NUM_SHOTS"
80+
81+
FLASH=1 torchrun --nproc_per_node="${NUM_GPUS}" --nnodes="${NNODES}" \
82+
--node_rank="${RANK}" --master_addr="${ADDR}" --master_port="${PORT}" \
83+
ac_export_feature_and_linear_probe.py \
84+
--embedding_size ${EMBEDDING_SIZE} \
85+
--data_set ${DATASET} \
86+
--seed ${SEED} \
87+
--num_shots ${NUM_SHOTS} \
88+
--num_step 8 \
89+
--train_data_root_path ${TRAIN_DATA_ROOT_PATH} \
90+
--train_data_csv_path ${TRAIN_DATA_CSV_PATH} \
91+
--val_data_root_path ${VAL_DATA_ROOT_PATH} \
92+
--val_data_csv_path ${VAL_DATA_CSV_PATH} \
93+
--save_report ${OUTPUT} \
94+
--batch_size ${BATCH_SIZE} \
95+
--model_name ${MODEL_NAME} \
96+
--model ${model} \
97+
--finetune ${FINETUNE} \
98+
--num_frames ${NUM_FRAMES} \
99+
--input_size ${INPUT_SIZE} \
100+
--tubelet_size ${TUBELET_SIZE} \
101+
--patch_size ${PATCH_SIZE}
102+
done
103+
done
104+
done

0 commit comments

Comments
 (0)