Skip to content

Commit b4fbc1a

Browse files
committed
Remove sensitive info and large files (>10MB)
- Shell script: Replace hardcoded NCCL settings, hostnames, master_addr with env vars - Python dataset: Replace hardcoded data paths with DATA_ROOT env var - Delete large GIF files: case4.gif, case5.gif, case6.gif (total ~49MB)
1 parent 2908a86 commit b4fbc1a

2 files changed

Lines changed: 16 additions & 112 deletions

File tree

dataset/dataset_llava_vit.py

Lines changed: 5 additions & 88 deletions
Original file line numberDiff line numberDiff line change
@@ -9,98 +9,15 @@
99

1010
logger = logging.getLogger(__file__)
1111

12-
list_coyo = [
13-
"/vlm/data/coyo400m_part1/coyo700m_00",
14-
"/vlm/data/coyo400m_part1/coyo700m_01",
15-
"/vlm/data/coyo400m_part1/coyo700m_02",
16-
"/vlm/data/coyo400m_part1/coyo700m_03",
17-
"/vlm/data/coyo400m_part1/coyo700m_04",
18-
"/vlm/data/coyo400m_part1/coyo700m_05",
19-
"/vlm/data/coyo400m_part1/coyo700m_06",
20-
"/vlm/data/coyo400m_part1/coyo700m_07",
21-
"/vlm/data/coyo400m_part1/coyo700m_08",
22-
"/vlm/data/coyo400m_part1/coyo700m_09",
23-
"/vlm/data/coyo400m_part2/coyo700m_10",
24-
"/vlm/data/coyo400m_part2/coyo700m_11",
25-
"/vlm/data/coyo400m_part2/coyo700m_12",
26-
"/vlm/data/coyo400m_part2/coyo700m_13",
27-
"/vlm/data/coyo400m_part2/coyo700m_14",
28-
"/vlm/data/coyo400m_part2/coyo700m_15",
29-
"/vlm/data/coyo400m_part2/coyo700m_16",
30-
"/vlm/data/coyo400m_part2/coyo700m_17",
31-
"/vlm/data/coyo400m_part2/coyo700m_18",
32-
"/vlm/data/coyo400m_part2/coyo700m_19",
33-
"/vlm/data/coyo400m_part3/coyo700m_20",
34-
"/vlm/data/coyo400m_part3/coyo700m_21",
35-
"/vlm/data/coyo400m_part3/coyo700m_22",
36-
"/vlm/data/coyo400m_part3/coyo700m_24",
37-
"/vlm/data/coyo400m_part3/coyo700m_25",
38-
"/vlm/data/coyo400m_part3/coyo700m_26",
39-
"/vlm/data/coyo400m_part3/coyo700m_27",
40-
"/vlm/data/coyo400m_part3/coyo700m_28",
41-
"/vlm/data/coyo400m_part3/coyo700m_29",
42-
"/vlm/data/coyo400m_part4/coyo700m_30",
43-
"/vlm/data/coyo400m_part4/coyo700m_31",
44-
"/vlm/data/coyo400m_part4/coyo700m_31",
45-
]
46-
list_laion = [
47-
"/vlm/data/LAION224M_HOI31M_IN13M_labeled_2024_03_05/LAION224M_HOI31M_IN13M_labeled_2024_03_05_VM-2-20-tencentos",
48-
"/vlm/data/LAION224M_HOI31M_IN13M_labeled_2024_03_05/LAION224M_HOI31M_IN13M_labeled_2024_03_05_VM-2-21-tencentos",
49-
"/vlm/data/LAION224M_HOI31M_IN13M_labeled_2024_03_05/LAION224M_HOI31M_IN13M_labeled_2024_03_05_VM-2-23-tencentos",
50-
"/vlm/data/LAION224M_HOI31M_IN13M_labeled_2024_03_05/LAION224M_HOI31M_IN13M_labeled_2024_03_05_VM-2-28-tencentos",
51-
"/vlm/data/LAION224M_HOI31M_IN13M_labeled_2024_03_05/LAION224M_HOI31M_IN13M_labeled_2024_03_05_VM-2-34-tencentos",
52-
"/vlm/data/LAION224M_HOI31M_IN13M_labeled_2024_03_05/LAION224M_HOI31M_IN13M_labeled_2024_03_05_VM-2-58-tencentos",
53-
"/vlm/data/LAION224M_HOI31M_IN13M_labeled_2024_03_05/LAION224M_HOI31M_IN13M_labeled_2024_03_05_VM-2-62-tencentos",
54-
"/vlm/data/LAION224M_HOI31M_IN13M_labeled_2024_03_05/LAION224M_HOI31M_IN13M_labeled_2024_03_05_VM-2-85-tencentos",
55-
]
12+
# data paths - configure via environment or replace with your paths
13+
DATA_ROOT = os.getenv("DATA_ROOT", "/path/to/data")
14+
list_coyo = [f"{DATA_ROOT}/coyo400m/coyo700m_{i:02d}" for i in range(32)]
15+
list_laion = [f"{DATA_ROOT}/laion/laion_part_{i:02d}" for i in range(8)]
5616

5717

5818
@DATASET_REGISTRY.register()
5919
def llava_vit_si_2025_12_12():
60-
list_coyo = [
61-
"/vlm/data/coyo400m_part1/coyo700m_00",
62-
"/vlm/data/coyo400m_part1/coyo700m_01",
63-
"/vlm/data/coyo400m_part1/coyo700m_02",
64-
"/vlm/data/coyo400m_part1/coyo700m_03",
65-
"/vlm/data/coyo400m_part1/coyo700m_04",
66-
"/vlm/data/coyo400m_part1/coyo700m_05",
67-
"/vlm/data/coyo400m_part1/coyo700m_06",
68-
"/vlm/data/coyo400m_part1/coyo700m_07",
69-
"/vlm/data/coyo400m_part1/coyo700m_08",
70-
"/vlm/data/coyo400m_part1/coyo700m_09",
71-
"/vlm/data/coyo400m_part2/coyo700m_10",
72-
"/vlm/data/coyo400m_part2/coyo700m_11",
73-
"/vlm/data/coyo400m_part2/coyo700m_12",
74-
"/vlm/data/coyo400m_part2/coyo700m_13",
75-
"/vlm/data/coyo400m_part2/coyo700m_14",
76-
"/vlm/data/coyo400m_part2/coyo700m_15",
77-
"/vlm/data/coyo400m_part2/coyo700m_16",
78-
"/vlm/data/coyo400m_part2/coyo700m_17",
79-
"/vlm/data/coyo400m_part2/coyo700m_18",
80-
"/vlm/data/coyo400m_part2/coyo700m_19",
81-
"/vlm/data/coyo400m_part3/coyo700m_20",
82-
"/vlm/data/coyo400m_part3/coyo700m_21",
83-
"/vlm/data/coyo400m_part3/coyo700m_22",
84-
"/vlm/data/coyo400m_part3/coyo700m_24",
85-
"/vlm/data/coyo400m_part3/coyo700m_25",
86-
"/vlm/data/coyo400m_part3/coyo700m_26",
87-
"/vlm/data/coyo400m_part3/coyo700m_27",
88-
"/vlm/data/coyo400m_part3/coyo700m_28",
89-
"/vlm/data/coyo400m_part3/coyo700m_29",
90-
"/vlm/data/coyo400m_part4/coyo700m_30",
91-
"/vlm/data/coyo400m_part4/coyo700m_31",
92-
"/vlm/data/coyo400m_part4/coyo700m_31",
93-
]
94-
list_laion = [
95-
"/vlm/data/LAION224M_HOI31M_IN13M_labeled_2024_03_05/LAION224M_HOI31M_IN13M_labeled_2024_03_05_VM-2-20-tencentos",
96-
"/vlm/data/LAION224M_HOI31M_IN13M_labeled_2024_03_05/LAION224M_HOI31M_IN13M_labeled_2024_03_05_VM-2-21-tencentos",
97-
"/vlm/data/LAION224M_HOI31M_IN13M_labeled_2024_03_05/LAION224M_HOI31M_IN13M_labeled_2024_03_05_VM-2-23-tencentos",
98-
"/vlm/data/LAION224M_HOI31M_IN13M_labeled_2024_03_05/LAION224M_HOI31M_IN13M_labeled_2024_03_05_VM-2-28-tencentos",
99-
"/vlm/data/LAION224M_HOI31M_IN13M_labeled_2024_03_05/LAION224M_HOI31M_IN13M_labeled_2024_03_05_VM-2-34-tencentos",
100-
"/vlm/data/LAION224M_HOI31M_IN13M_labeled_2024_03_05/LAION224M_HOI31M_IN13M_labeled_2024_03_05_VM-2-58-tencentos",
101-
"/vlm/data/LAION224M_HOI31M_IN13M_labeled_2024_03_05/LAION224M_HOI31M_IN13M_labeled_2024_03_05_VM-2-62-tencentos",
102-
"/vlm/data/LAION224M_HOI31M_IN13M_labeled_2024_03_05/LAION224M_HOI31M_IN13M_labeled_2024_03_05_VM-2-85-tencentos",
103-
]
20+
# use module-level list_coyo and list_laion
10421
rank = int(os.getenv("RANK", "0"))
10522
world_size = int(os.getenv("WORLD_SIZE", "1")) # Expected to be 128
10623

shells/2025_12_08_new_l14_continue_128gpus_all_residual.sh

Lines changed: 11 additions & 24 deletions
Original file line numberDiff line numberDiff line change
@@ -18,37 +18,24 @@ export NCCL_NSOCKS_PERTHREAD=1
1818
export NCCL_IB_GID_INDEX=3
1919
export NCCL_DEBUG=INFO
2020
export NCCL_IB_DISABLE=0
21-
export NCCL_IB_HCA=mlx5_2,mlx5_3,mlx5_4,mlx5_5,mlx5_6,mlx5_7,mlx5_8,mlx5_1
22-
export NCCL_NET_GDR_LEVEL=2
23-
export NCCL_IB_QPS_PER_CONNECTION=8
24-
export NCCL_IB_TC=160
25-
export NCCL_IB_TIMEOUT=22
21+
export NCCL_IB_HCA=${NCCL_IB_HCA:-"mlx5_0"}
22+
export NCCL_NET_GDR_LEVEL=${NCCL_NET_GDR_LEVEL:-2}
23+
export NCCL_IB_QPS_PER_CONNECTION=${NCCL_IB_QPS_PER_CONNECTION:-8}
24+
export NCCL_IB_TC=${NCCL_IB_TC:-160}
25+
export NCCL_IB_TIMEOUT=${NCCL_IB_TIMEOUT:-22}
2626
export USE_CHECKPOINT=0
2727
export CUDA_VISIBLE_DEVICES=0,1,2,3,4,5,6,7
2828

29-
# 主机名列表
29+
# 主机名列表 - 请根据实际环境配置
3030
list_hostname=(
31-
instance-5fbzrg73
32-
instance-21s8vw5h
33-
instance-mtntjld4-01
34-
instance-mtntjld4-02
35-
instance-mtntjld4-03
36-
instance-mtntjld4-04
37-
instance-mtntjld4-05
38-
instance-mtntjld4-06
39-
instance-mtntjld4-07
40-
instance-mtntjld4-08
41-
instance-mtntjld4-09
42-
instance-mtntjld4-10
43-
instance-mtntjld4-11
44-
instance-mtntjld4-12
45-
instance-mtntjld4-13
46-
instance-mtntjld4-14
31+
# 在此添加你的主机名
32+
# example-node-01
33+
# example-node-02
4734
)
4835

4936
# 主节点地址和端口
50-
master_addr="172.16.5.19"
51-
master_port=$((18889 + 305))
37+
master_addr="${MASTER_ADDR:-127.0.0.1}"
38+
master_port="${MASTER_PORT:-29500}"
5239

5340
# 计算节点总数
5441
nnode=${#list_hostname[@]}

0 commit comments

Comments
 (0)