OneOS/Trainer/Dockerfile.train at master · wasertech/OneOS · GitHub

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
FROM cnstark/pytorch:1.13.0-py3.9.12-cuda11.7.1-ubuntu20.04

# Model parameters
ARG dataset_name="wasertech/OneOS"
ENV DATASET_NAME=$dataset_name

ARG base_model_name="TinyPixel/Llama-2-7B-bf16-sharded"
ENV BASE_MODEL_NAME=$base_model_name

ARG output_dir="output"
ENV OUTPUT_MODEL_PATH=$output_dir

ARG model_name="assistant-llama2-7b-bf16"
ENV OUTPUT_MODEL_NAME=$model_name

ARG disable_no_ignore_characters_warning=0
ENV DONT_WARN_IGNORE_CHARS=$disable_no_ignore_characters_warning

# Training hyper-parameters

ARG distribute_training=0
ENV DISTRIBUTE_TRAIN=$distribute_training

ARG train_batch_size=16
ENV TRAIN_BATCH_SIZE=$train_batch_size

ARG eval_batch_size=8
ENV EVAL_BATCH_SIZE=$eval_batch_size

ARG opm_nproc=1
ENV OMP_NUM_THREADS=$opm_nproc

ARG epochs=1
ENV EPOCHS=$epochs

ARG learning_rate="1.41e-5"
ENV LEARNING_RATE=$learning_rate

ARG seq_len=8192
ENV SEQENCE_LENGTH=$seq_len

ARG dropout=0.0
ENV DROPOUT=$dropout

ARG amp=1
ENV AMP=$amp

ARG freeze_encoder=1
ENV FREEZE_ENCODER=$freeze_encoder

# Gradient Accumulation Steps
ARG gas=2
ENV GAS=$gas

ARG gradien_checkpointing=1
ENV GRAD_CHECK=$gradien_checkpointing

ARG use_peft=0
ENV USE_PEFT=$use_peft

ARG use_4bit=0
ENV USE_4BIT=$use_4bit

ARG use_8bit=0
ENV USE_8BIT=$use_8bit

ENV push_to_hub=0
ENV PUSH_TO_HUB=$push_to_hub

# Warm up uses ratio over steps
# Set WARMUP_RATIO=0 to use steps instead
ARG warm_up_steps=500
ENV WARMUP_STEPS=$warm_up_steps

ARG warm_up_ratio=0.1
ENV WARMUP_RATIO=$warm_up_ratio

ARG save_steps=400
ENV SAVE_STEPS=$save_steps

ARG eval_steps=100
ENV EVAL_STEPS=$eval_steps

ARG eval_strategy="steps"
ENV EVAL_STRAT=$eval_strategy

ARG max_amount_checkpoints=3
ENV MAX_CHECKPOINTS=$max_amount_checkpoints

# Data processing workers
# (too many might OOM) recommended at 1 worker per CPU core or lower.
# Meaning if you have 12 cores (24 threads) and 100 Gb of RAM, set it to 12 workers.
# You could probably handle more but with 1 worker per thread,
# you are likely to experience OOM issues.
ARG nproc=12
ENV NPROC=$nproc

# Only for distributed training (more than one GPU)
# DISTRIBUTE_TRAIN must be 1
# set NPROC_PER_GPU=2 to use 2 GPUs
ARG nproc_per_gpu=0
ENV NPROC_PER_GPU=$nproc_per_gpu

ARG hub_token=""
ENV HUB_API_TOKEN=$hub_token

ARG uid=999
ENV UID=$uid

ARG gid=999
ENV GID=$gid

# Make sure we can extract filenames with UTF-8 chars
ENV LANG=C.UTF-8

# Avoid keyboard-configuration step
ENV DEBIAN_FRONTEND noninteractive

ENV HOMEDIR /home/trainer
ENV WORKDIR /mnt

ENV VIRTUAL_ENV_NAME llm-train
ENV VIRTUAL_ENV $HOMEDIR/$VIRTUAL_ENV_NAME

ENV PATH="$VIRTUAL_ENV/bin:$PATH"

RUN env

# Get basic packages
RUN apt-get -qq update && apt-get -qq install -y --no-install-recommends \
    build-essential \
    curl \
    wget \
    git \
    # ffmpeg \
    python3 \
    python3-pip \
    ca-certificates \
    cmake \
    # libboost-all-dev \
    # zlib1g-dev \
    # libbz2-dev \
    # liblzma-dev \
    pkg-config \
    g++ \
    virtualenv \
    # unzip \
    # pixz \
    # sox \
    sudo \
    # libsox-fmt-all \
    locales locales-all
    # xz-utils

# For uploading models to HuggingFace hub
RUN apt -qq install -y --no-install-recommends git-lfs

# Setup user permissions
RUN groupadd -g $GID trainer && \
    adduser --system --uid $UID --group trainer

RUN echo "trainer ALL=(root) NOPASSWD:ALL" > /etc/sudoers.d/trainer && \
    chmod 0440 /etc/sudoers.d/trainer

# Below that point, nothing requires being root
USER trainer

WORKDIR $HOMEDIR

RUN virtualenv --python=/usr/bin/python3 $VIRTUAL_ENV_NAME

ENV PATH=$HOMEDIR/$VIRTUAL_ENV_NAME/bin:$PATH

WORKDIR $HOMEDIR

RUN pip install 'git+https://github.com/huggingface/trl.git'

RUN pip install 'git+https://github.com/huggingface/transformers.git'

RUN pip install accelerate

RUN pip install 'git+https://github.com/huggingface/peft.git'

RUN pip install 'git+https://github.com/huggingface/datasets.git'

RUN pip install bitsandbytes

RUN pip install einops

RUN pip install wandb

RUN pip install scipy

# Install AutoAWQ

RUN pip install autoawq

RUN pip install 'git+https://github.com/huggingface/tokenizers.git#egg=tokenizers'

# Install UnSloth
# RUN pip install "unsloth[cu121_torch211] @ git+https://github.com/unslothai/unsloth.git"

WORKDIR $WORKDIR

# Copy now so that docker build can leverage caches
COPY --chown=trainer:trainer . $HOMEDIR/

ENTRYPOINT "$HOMEDIR/run.sh"