Skip to content

Commit 00f7e16

Browse files
authored
upgrade deps to pytorch 2.9.1 and transformers 5.2.0 (#1596)
* chore(build): update deps transformers -> 5.2.0 * chore(build): update cudnn to 9.15.1.9 of torch 2.9.1 in dockerfile * chore(ci): use wider tolerance in test_qwen3_5 sp case * refactor: add RopeParametersConfig due to transformers 5.2.0 bc * chore(build): conditional path for lmdeploy and sglang in Dockerfile * chore(build): update dockerfile for deepep, deep_gemm and ci proxy speed fix * fix(ci): ep>1 clip_grad_norm fails due to pt2.9 check * fix(ci): clean hf dynamic modules before test setup * chore(docker): update lmdeploy deps * refactor: Move compile config from FSDPConfig to model_cfg * fix(engine): use field rope_parameters when save_hf
1 parent c0d0476 commit 00f7e16

62 files changed

Lines changed: 937 additions & 342 deletions

Some content is hidden

Large Commits have some content hidden by default. Use the searchbox below for content that may be hidden.

.dev_scripts/xtuner_rl_path.pth

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1 @@
1+
import xtuner_rl_path
Lines changed: 18 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,18 @@
1+
import os
2+
import sys
3+
4+
dist_packages_index = 0
5+
for i, path in enumerate(sys.path):
6+
if path.endswith("dist-packages"):
7+
dist_packages_index = i
8+
break
9+
10+
if os.getenv('XTUNER_USE_LMDEPLOY', '').lower() in ['1', 'on', 'true']:
11+
lmdeploy_envs_dir = os.getenv('XTUNER_LMDEPLOY_ENVS_DIR', '/envs/lmdeploy')
12+
if lmdeploy_envs_dir not in sys.path:
13+
sys.path.insert(dist_packages_index, lmdeploy_envs_dir)
14+
15+
elif os.getenv('XTUNER_USE_SGLANG', '').lower() in ['1', 'on', 'true']:
16+
sglang_envs_dir = os.getenv('XTUNER_SGLANG_ENVS_DIR', '/envs/sglang')
17+
if sglang_envs_dir not in sys.path:
18+
sys.path.insert(dist_packages_index, sglang_envs_dir)

.github/workflows/unit_test.yaml

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -14,7 +14,7 @@ on:
1414
env:
1515
WORKSPACE_PREFIX: $(echo $GITHUB_WORKSPACE |cut -d '/' -f 1-5)
1616
WORKSPACE_PREFIX_SHORT: $(echo $GITHUB_WORKSPACE |cut -d '/' -f 1-3)
17-
IMAGE: ailab-llmrazor/xtuner:pt28_20251216_d769950
17+
IMAGE: ailab-llmrazor/xtuner_tmp:pt29_20260414_c8f6fa1
1818

1919
concurrency:
2020
group: ${{ github.workflow }}-${{ github.event.pull_request.number || github.ref }}

Dockerfile

Lines changed: 117 additions & 53 deletions
Original file line numberDiff line numberDiff line change
@@ -2,39 +2,38 @@
22
# builder
33
ARG BASE_IMAGE=nvcr.io/nvidia/pytorch:25.03-py3
44

5-
## build args
5+
## build base env
66
FROM ${BASE_IMAGE} AS setup_env
77

8-
ARG TORCH_VERSION
98
ARG PPA_SOURCE
10-
11-
RUN --mount=type=secret,id=HTTPS_PROXY,env=https_proxy \
12-
sed -i "s@http://.*.ubuntu.com@${PPA_SOURCE}@g" /etc/apt/sources.list.d/ubuntu.sources && \
9+
# RUN --mount=type=secret,id=HTTPS_PROXY,env=https_proxy \
10+
RUN sed -i "s@http://.*.ubuntu.com@${PPA_SOURCE}@g" /etc/apt/sources.list.d/ubuntu.sources && \
1311
apt update && \
1412
apt install --no-install-recommends ca-certificates -y && \
1513
apt install --no-install-recommends bc wget -y && \
1614
apt install --no-install-recommends build-essential sudo -y && \
1715
apt install --no-install-recommends git curl pkg-config tree unzip tmux \
1816
openssh-server openssh-client dnsutils iproute2 lsof net-tools zsh rclone \
19-
iputils-ping telnet netcat-openbsd -y && \
17+
iputils-ping telnet netcat-openbsd htop bubblewrap socat -y && \
2018
apt clean && rm -rf /var/lib/apt/lists/*
2119

2220
RUN if [ -d /etc/pip ] && [ -f /etc/pip/constraint.txt ]; then echo > /etc/pip/constraint.txt; fi
23-
RUN pip install pystack py-spy --no-cache-dir
21+
RUN pip uninstall flash_attn opencv -y && rm -rf /usr/local/lib/python3.12/dist-packages/cv2
2422
RUN git config --system --add safe.directory "*"
2523

24+
# torch
25+
ARG TORCH_VERSION
26+
ARG PYTORCH_WHEELS_URL
2627
RUN --mount=type=secret,id=HTTPS_PROXY,env=https_proxy \
28+
--mount=type=secret,id=NO_PROXY,env=no_proxy \
2729
if [ -n "${TORCH_VERSION}" ]; then \
2830
pip install torchvision torch==${TORCH_VERSION} \
29-
--index-url https://download.pytorch.org/whl/cu128 \
30-
--extra-index-url https://download.pytorch.org/whl/cu126 \
31+
-i ${PYTORCH_WHEELS_URL}/cu128 \
32+
--extra-index-url ${PYTORCH_WHEELS_URL}/cu126 \
3133
--no-cache-dir; \
3234
fi
33-
3435
# set reasonable default for CUDA architectures when building ngc image
35-
ENV TORCH_CUDA_ARCH_LIST="7.5 8.0 8.6 9.0 10.0"
36-
37-
RUN pip uninstall flash_attn opencv -y && rm -rf /usr/local/lib/python3.12/dist-packages/cv2
36+
ENV TORCH_CUDA_ARCH_LIST="9.0 10.0"
3837

3938
ARG FLASH_ATTN_DIR=/tmp/flash-attn
4039
ARG CODESPACE=/root/codespace
@@ -56,6 +55,9 @@ ARG CODESPACE
5655
ARG FLASH_ATTN_DIR
5756
ARG FLASH_ATTN3_DIR
5857
ARG FLASH_ATTN_URL
58+
# force hopper for now, you change it throught build args
59+
ARG FLASH_ATTN_CUDA_ARCHS="90"
60+
ARG FLASH_ATTENTION_DISABLE_SM80="TRUE"
5961

6062
RUN --mount=type=secret,id=HTTPS_PROXY,env=https_proxy \
6163
git clone $(echo ${FLASH_ATTN_URL} | cut -d '@' -f 1) && \
@@ -119,42 +121,41 @@ WORKDIR ${CODESPACE}/causal-conv1d
119121

120122
RUN CAUSAL_CONV1D_FORCE_BUILD=TRUE pip wheel -w ${CAUSAL_CONV1D_DIR} -v --no-deps --no-build-isolation .
121123

122-
# pypi install nvshmem and compile deepep
124+
# compile nvshmem and deepep
123125
FROM setup_env AS deep_ep
124126

125127
ARG CODESPACE
126128
ARG DEEP_EP_DIR
127129
ARG DEEP_EP_URL
128-
# build sm90 and sm100 for deep_ep for now
129-
ARG TORCH_CUDA_ARCH_LIST="9.0 10.0"
130130

131+
# RUN --mount=type=secret,id=HTTPS_PROXY,env=https_proxy \
132+
# curl -LO https://github.com/NVIDIA/nvshmem/releases/download/v3.4.5-0/nvshmem_src_cuda-all-all-3.4.5.tar.gz && \
133+
# tar -zxvf nvshmem_src_cuda-all-all-3.4.5.tar.gz && \
134+
# cd ${CODESPACE}/nvshmem_src && \
135+
# NVSHMEM_SHMEM_SUPPORT=0 \
136+
# NVSHMEM_UCX_SUPPORT=0 \
137+
# NVSHMEM_USE_NCCL=0 \
138+
# NVSHMEM_MPI_SUPPORT=0 \
139+
# NVSHMEM_IBGDA_SUPPORT=1 \
140+
# NVSHMEM_USE_GDRCOPY=0 \
141+
# NVSHMEM_PMIX_SUPPORT=0 \
142+
# NVSHMEM_TIMEOUT_DEVICE_POLLING=0 \
143+
# NVSHMEM_BUILD_TESTS=0 \
144+
# NVSHMEM_BUILD_EXAMPLES=0 \
145+
# NVSHMEM_BUILD_HYDRA_LAUNCHER=0 \
146+
# NVSHMEM_BUILD_TXZ_PACKAGE=0 \
147+
# NVSHMEM_BUILD_PYTHON_LIB=OFF \
148+
# cmake -S . -B build/ -DCMAKE_INSTALL_PREFIX=${NVSHMEM_PREFIX} -DMLX5_lib=/lib/x86_64-linux-gnu/libmlx5.so.1 && \
149+
# cmake --build build --target install --parallel 32 && \
131150
RUN --mount=type=secret,id=HTTPS_PROXY,env=https_proxy \
132-
curl -LO https://github.com/NVIDIA/nvshmem/releases/download/v3.4.5-0/nvshmem_src_cuda-all-all-3.4.5.tar.gz && \
133-
tar -zxvf nvshmem_src_cuda-all-all-3.4.5.tar.gz && \
134-
cd ${CODESPACE}/nvshmem_src && \
135-
NVSHMEM_SHMEM_SUPPORT=0 \
136-
NVSHMEM_UCX_SUPPORT=0 \
137-
NVSHMEM_USE_NCCL=0 \
138-
NVSHMEM_MPI_SUPPORT=0 \
139-
NVSHMEM_IBGDA_SUPPORT=1 \
140-
NVSHMEM_USE_GDRCOPY=0 \
141-
NVSHMEM_PMIX_SUPPORT=0 \
142-
NVSHMEM_TIMEOUT_DEVICE_POLLING=0 \
143-
NVSHMEM_BUILD_TESTS=0 \
144-
NVSHMEM_BUILD_EXAMPLES=0 \
145-
NVSHMEM_BUILD_HYDRA_LAUNCHER=0 \
146-
NVSHMEM_BUILD_TXZ_PACKAGE=0 \
147-
NVSHMEM_BUILD_PYTHON_LIB=OFF \
148-
cmake -S . -B build/ -DCMAKE_INSTALL_PREFIX=${NVSHMEM_PREFIX} -DMLX5_lib=/lib/x86_64-linux-gnu/libmlx5.so.1 && \
149-
cmake --build build --target install --parallel 32 && \
150151
cd ${CODESPACE} && git clone $(echo ${DEEP_EP_URL} | cut -d '@' -f 1) && \
151152
cd ${CODESPACE}/DeepEP && \
152153
git checkout $(echo ${DEEP_EP_URL} | cut -d '@' -f 2) && \
153154
git submodule update --init --recursive --force
154155

155156
WORKDIR ${CODESPACE}/DeepEP
156157

157-
RUN NVSHMEM_DIR=${NVSHMEM_PREFIX} pip wheel -w ${DEEP_EP_DIR} -v --no-deps .
158+
RUN pip wheel -w ${DEEP_EP_DIR} -v --no-deps .
158159

159160
# compile deep_gemm
160161
FROM setup_env AS deep_gemm
@@ -192,7 +193,7 @@ COPY --from=flash_attn ${FLASH_ATTN_DIR} ${FLASH_ATTN_DIR}
192193
COPY --from=adaptive_gemm ${ADAPTIVE_GEMM_DIR} ${ADAPTIVE_GEMM_DIR}
193194
COPY --from=grouped_gemm ${GROUPED_GEMM_DIR} ${GROUPED_GEMM_DIR}
194195
COPY --from=deep_ep ${DEEP_EP_DIR} ${DEEP_EP_DIR}
195-
COPY --from=deep_ep ${NVSHMEM_PREFIX} ${NVSHMEM_PREFIX}
196+
# COPY --from=deep_ep ${NVSHMEM_PREFIX} ${NVSHMEM_PREFIX}
196197
COPY --from=deep_gemm ${DEEP_GEMM_DIR} ${DEEP_GEMM_DIR}
197198
COPY --from=causal_conv1d ${CAUSAL_CONV1D_DIR} ${CAUSAL_CONV1D_DIR}
198199

@@ -204,51 +205,114 @@ RUN unzip ${DEEP_EP_DIR}/*.whl -d ${PYTHON_SITE_PACKAGE_PATH}
204205
RUN unzip ${DEEP_GEMM_DIR}/*.whl -d ${PYTHON_SITE_PACKAGE_PATH}
205206
RUN unzip ${CAUSAL_CONV1D_DIR}/*.whl -d ${PYTHON_SITE_PACKAGE_PATH}
206207

207-
# install sglang and its runtime requirements
208-
ARG SGLANG_VERSION
208+
ARG DEFAULT_PYPI_URL
209209

210-
RUN --mount=type=secret,id=HTTPS_PROXY,env=https_proxy \
211-
pip install sglang==${SGLANG_VERSION} sgl-kernel==0.3.14.post1 pybase64 orjson uvloop setproctitle msgspec \
212-
compressed_tensors python-multipart torch_memory_saver \
213-
grpcio-tools==1.75.1 hf_transfer interegular llguidance==0.7.11 \
214-
xgrammar==0.1.24 blobfile==3.0.0 flashinfer_python==0.4.0 --no-cache-dir --no-deps
210+
# RUN --mount=type=secret,id=HTTPS_PROXY,env=https_proxy \
211+
RUN pip install pystack py-spy --no-cache-dir -i ${DEFAULT_PYPI_URL}
212+
213+
# install sglang and its runtime requirements
214+
ENV XTUNER_SGLANG_ENVS_DIR=/envs/sglang
215+
216+
# RUN --mount=type=secret,id=HTTPS_PROXY,env=https_proxy \
217+
RUN \
218+
pip install --target ${XTUNER_SGLANG_ENVS_DIR} \
219+
sglang==0.5.9 sgl-kernel==0.3.21 \
220+
apache-tvm-ffi==0.1.9 \
221+
anthropic==0.86.0 \
222+
build==1.4.0 \
223+
cuda-python==12.9.0 \
224+
decord2==3.2.0 \
225+
flashinfer_python==0.6.3 \
226+
flashinfer_cubin==0.6.3 \
227+
gguf==0.18.0 \
228+
modelscope==1.35.3 \
229+
nvidia-cutlass-dsl==4.4.2 \
230+
openai-harmony==0.0.4 \
231+
openai==2.6.1 \
232+
outlines==0.1.11 \
233+
quack-kernels==0.2.4 \
234+
timm==1.0.16 \
235+
torchao==0.9.0 \
236+
torchaudio==2.9.1 \
237+
torchcodec==0.8.0 \
238+
xgrammar==0.1.32 \
239+
smg-grpc-proto==0.4.5 \
240+
grpcio==1.78.1 \
241+
grpcio-reflection==1.78.1 \
242+
grpcio-health-checking==1.80.0 \
243+
pycryptodomex==3.23.0 \
244+
lxml==6.0.2 \
245+
cuda-bindings==12.9.6 \
246+
cuda-pathfinder==1.5.0 \
247+
nvidia-cudnn-frontend==1.21.0 \
248+
lark==1.3.1 \
249+
pycountry==26.2.16 \
250+
airportsdata==20260315 \
251+
outlines_core==0.1.26 \
252+
torch-c-dlpack-ext==0.1.5 \
253+
pyproject_hooks==1.2.0 \
254+
huggingface_hub==0.36.2 \
255+
torch_memory_saver==0.0.9 \
256+
diskcache==5.6.3 distro==1.9.0 jiter==0.13.0 \
257+
llguidance==0.7.11 blobfile==3.0.0 \
258+
pybase64 orjson uvloop setproctitle msgspec partial_json_parser \
259+
compressed_tensors python-multipart \
260+
hf_transfer interegular --no-cache-dir --no-deps -i ${DEFAULT_PYPI_URL}
215261

216262
# install lmdeploy and its missing runtime requirements
217263
ARG LMDEPLOY_VERSION
218264
ARG LMDEPLOY_URL
265+
ENV XTUNER_LMDEPLOY_ENVS_DIR=/envs/lmdeploy
219266

267+
# RUN --mount=type=secret,id=HTTPS_PROXY,env=https_proxy \
268+
ARG LMDEPLOY_WHEELS=https://github.com/InternLM/lmdeploy/releases/download/v${LMDEPLOY_VERSION}/lmdeploy-${LMDEPLOY_VERSION}+cu128-cp312-cp312-manylinux2014_x86_64.whl
220269
RUN --mount=type=secret,id=HTTPS_PROXY,env=https_proxy \
270+
--mount=type=secret,id=NO_PROXY,env=no_proxy \
221271
pip install fastapi fire openai outlines \
222-
partial_json_parser ray[default] shortuuid uvicorn \
223-
'pydantic>2' openai_harmony dlblas --no-cache-dir && \
272+
pyzmq aiohttp cloudpickle prometheus_client protobuf numpy pillow einops tiktoken sentencepiece \
273+
partial_json_parser 'ray[default]<3' shortuuid uvicorn pybase64 \
274+
'pydantic>2' openai_harmony dlblas --target ${XTUNER_LMDEPLOY_ENVS_DIR} --no-cache-dir -i ${DEFAULT_PYPI_URL} && \
275+
pip install xgrammar==0.1.32 timm!=1.0.23 --no-cache-dir -i ${DEFAULT_PYPI_URL} --no-deps && \
224276
if [ -n "${LMDEPLOY_VERSION}" ]; then \
225-
pip install lmdeploy==${LMDEPLOY_VERSION} --no-deps --no-cache-dir; \
277+
# pip install lmdeploy==${LMDEPLOY_VERSION} --target ${XTUNER_LMDEPLOY_ENVS_DIR} --no-deps --no-cache-dir -i ${DEFAULT_PYPI_URL}; \
278+
echo pip install ${LMDEPLOY_WHEELS} --target ${XTUNER_LMDEPLOY_ENVS_DIR} --no-deps --no-cache-dir -i ${DEFAULT_PYPI_URL}; \
279+
pip install ${LMDEPLOY_WHEELS} --target ${XTUNER_LMDEPLOY_ENVS_DIR} --no-deps --no-cache-dir -i ${DEFAULT_PYPI_URL}; \
226280
else \
227281
git clone $(echo ${LMDEPLOY_URL} | cut -d '@' -f 1) && \
228282
cd ${CODESPACE}/lmdeploy && \
229283
git checkout $(echo ${LMDEPLOY_URL} | cut -d '@' -f 2) && \
230-
pip install . -v --no-deps --no-cache-dir; \
284+
pip install . -v --target ${XTUNER_LMDEPLOY_ENVS_DIR} --no-deps --no-cache-dir -i ${DEFAULT_PYPI_URL}; \
231285
fi
232286

233287
## install xtuner
234288
ARG XTUNER_URL
235289
ARG XTUNER_COMMIT
236-
#RUN --mount=type=secret,id=HTTPS_PROXY,env=https_proxy \
290+
# RUN --mount=type=secret,id=HTTPS_PROXY,env=https_proxy \
237291
# git clone $(echo ${XTUNER_URL} | cut -d '@' -f 1) && \
238292
# cd ${CODESPACE}/xtuner && \
239293
# git checkout $(echo ${XTUNER_URL} | cut -d '@' -f 2)
240294
COPY . ${CODESPACE}/xtuner
241295

242296
WORKDIR ${CODESPACE}/xtuner
243-
RUN --mount=type=secret,id=HTTPS_PROXY,env=https_proxy \
244-
pip install .[all] -v --no-cache-dir
297+
298+
# Install custom .pth file for conditional lmdeploy and sglang path injection
299+
RUN cp -r .dev_scripts/xtuner_rl_path* ${PYTHON_SITE_PACKAGE_PATH}/
300+
301+
# RUN --mount=type=secret,id=HTTPS_PROXY,env=https_proxy \
302+
RUN pip install .[all] -v --no-cache-dir -i ${DEFAULT_PYPI_URL}
245303

246304
WORKDIR ${CODESPACE}
247305

248306
# nccl update for torch 2.6.0
249-
RUN --mount=type=secret,id=HTTPS_PROXY,env=https_proxy \
250-
if [ "x${TORCH_VERSION}" = "x2.6.0" ]; then \
251-
pip install nvidia-nccl-cu12==2.25.1 --no-cache-dir; \
307+
# RUN --mount=type=secret,id=HTTPS_PROXY,env=https_proxy \
308+
RUN if [ "x${TORCH_VERSION}" = "x2.6.0" ]; then \
309+
pip install nvidia-nccl-cu12==2.25.1 --no-cache-dir -i ${DEFAULT_PYPI_URL}; \
310+
fi
311+
312+
# cudnn update for torch 2.9.1
313+
# RUN --mount=type=secret,id=HTTPS_PROXY,env=https_proxy \
314+
RUN if [ "x${TORCH_VERSION}" = "x2.9.1" ]; then \
315+
pip install nvidia-cudnn-cu12==9.15.1.9 --no-cache-dir -i ${DEFAULT_PYPI_URL}; \
252316
fi
253317

254318
# setup sysctl

autotest/config.yaml

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -8,7 +8,7 @@ default_config:
88
gpus_per_task: 8
99
cpus_per_task: 120
1010
memory_per_task: 512
11-
image: ailab-llmrazor/xtuner:pt28_latest
11+
image: ailab-llmrazor/xtuner_tmp:pt29_20260414_c8f6fa1
1212
envs:
1313
- HF_HUB_CACHE=/mnt/shared-storage-user/auto-eval-pipeline/opencompass/models/hf_hub
1414
eval:

autotest/config/gptoss.py

Lines changed: 3 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -18,27 +18,27 @@
1818

1919

2020
gptoss_cfg = GptOss21BA3P6Config(
21+
compile_cfg=False,
2122
rope_scaling_cfg=RopeScalingConfig(
2223
type="yarn",
2324
beta_fast=16.0,
2425
beta_slow=1.05,
2526
factor=16.0,
2627
original_max_position_embeddings=4096,
2728
truncate=True,
28-
)
29+
),
2930
)
3031
optim_cfg = AdamWConfig(lr=6e-05)
3132
lr_cfg = LRConfig(lr_type="cosine", lr_min=1e-6)
3233
fsdp_cfg = FSDPConfig(
33-
torch_compile=False,
3434
cpu_offload=False,
3535
ep_size=gptoss_cfg.ep_size,
3636
)
3737

3838
dataset_config = [
3939
{
4040
"dataset": DatasetConfig(name="alpaca", anno_path=ALPACA_PATH, sample_ratio=1.0),
41-
"tokenize_fn": OpenaiTokenizeFunctionConfig(chat_template='gpt-oss', max_length=16384),
41+
"tokenize_fn": OpenaiTokenizeFunctionConfig(chat_template="gpt-oss", max_length=16384),
4242
},
4343
]
4444

autotest/config/npu_qwen3.py

Lines changed: 2 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -16,11 +16,10 @@
1616
ALPACA_PATH = os.environ["ALPACA_PATH"]
1717

1818

19-
moe_cfg = Qwen3MoE30BA3Config()
19+
moe_cfg = Qwen3MoE30BA3Config(compile_cfg=False)
2020
optim_cfg = AdamWConfig(lr=6e-05)
2121
lr_cfg = LRConfig(lr_type="cosine", lr_min=1e-6)
2222
fsdp_cfg = FSDPConfig(
23-
torch_compile=False,
2423
cpu_offload=False,
2524
ep_size=moe_cfg.ep_size,
2625
)
@@ -34,7 +33,7 @@
3433

3534
dataloader_config = DataloaderConfig(pack_max_length=16384)
3635

37-
loss_cfg = CELossConfig(mode="chunk", chunk_size=1024) # CELossConfig()
36+
loss_cfg = CELossConfig(mode="chunk", chunk_size=1024) # CELossConfig()
3837

3938

4039
trainer = TrainerConfig(

autotest/config/npu_qwen3_16nums.py

Lines changed: 2 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -16,11 +16,10 @@
1616
ALPACA_PATH = os.environ["ALPACA_PATH"]
1717

1818

19-
moe_cfg = Qwen3MoE30BA3Config()
19+
moe_cfg = Qwen3MoE30BA3Config(compile_cfg=False)
2020
optim_cfg = AdamWConfig(lr=6e-05)
2121
lr_cfg = LRConfig(lr_type="cosine", lr_min=1e-6)
2222
fsdp_cfg = FSDPConfig(
23-
torch_compile=False,
2423
cpu_offload=False,
2524
ep_size=moe_cfg.ep_size,
2625
)
@@ -34,7 +33,7 @@
3433

3534
dataloader_config = DataloaderConfig(pack_max_length=16384)
3635

37-
loss_cfg = CELossConfig(mode="chunk", chunk_size=1024) # CELossConfig()
36+
loss_cfg = CELossConfig(mode="chunk", chunk_size=1024) # CELossConfig()
3837

3938

4039
trainer = TrainerConfig(

autotest/config/npu_qwen3_moe_30BA3_ep8.py

Lines changed: 1 addition & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -16,11 +16,10 @@
1616
ALPACA_PATH = os.environ["ALPACA_PATH"]
1717

1818

19-
moe_cfg = Qwen3MoE30BA3Config(ep_size=8)
19+
moe_cfg = Qwen3MoE30BA3Config(ep_size=8, compile_cfg=False)
2020
optim_cfg = AdamWConfig(lr=6e-05)
2121
lr_cfg = LRConfig(lr_type="cosine", lr_min=1e-6)
2222
fsdp_cfg = FSDPConfig(
23-
torch_compile=True,
2423
cpu_offload=False,
2524
ep_size=moe_cfg.ep_size,
2625
)

0 commit comments

Comments
 (0)