File tree Expand file tree Collapse file tree
examples/moonshotai/Kimi-K2.5 Expand file tree Collapse file tree Original file line number Diff line number Diff line change 1818native_mpi_rank=$OMPI_COMM_WORLD_RANK
1919native_mpi_local_rank=$OMPI_COMM_WORLD_LOCAL_RANK
2020# Works with Slurm launching with `--mpi=pmix`
21- mpi_rank=${PMIX_RANK:- $native_mpi_rank }
22- mpi_local_rank=${PMIX_LOCAL_RANK:- $native_mpi_local_rank }
21+ mpi_rank=${PMIX_RANK:- ${ native_mpi_rank:- ${SLURM_PROCID :- 0} } }
22+ mpi_local_rank=${PMIX_LOCAL_RANK:- ${ native_mpi_local_rank:- ${SLURM_LOCALID :- 0} } }
2323
2424FAIL=0
2525FAIL_EXIT=0
@@ -48,8 +48,23 @@ function report_result {
4848}
4949
5050function util_install_extra_dep {
51+ local _marker=/tmp/.nmm_extra_dep_installed
52+ if [[ -f " $_marker " ]]; then
53+ return 0
54+ fi
5155 if [[ " $mpi_local_rank " -eq 0 ]]; then
5256 pip install diskcache
57+ local _nvrx_dir
58+ _nvrx_dir=" $( mktemp -d) /nvidia-resiliency-ext"
59+ git clone --depth 1 https://github.com/NVIDIA/nvidia-resiliency-ext " ${_nvrx_dir} " \
60+ && pip install " ${_nvrx_dir} "
61+ touch " $_marker "
62+ else
63+ local _waited=0
64+ while [[ ! -f " $_marker " && $_waited -lt 600 ]]; do
65+ sleep 1
66+ _waited=$(( _waited + 1 ))
67+ done
5368 fi
5469}
5570
Original file line number Diff line number Diff line change 1+ # DFlash offline synthetic data generation pipeline for Kimi-K2.5.
2+ #
3+ # 1-step pipeline (task_0 only):
4+ # task_0: Data synthesis — query vLLM server to generate prompt samples
5+ #
6+ # Usage:
7+ # uv run launch.py --yaml examples/moonshotai/Kimi-K2.5/hf_offline_dflash.yaml --yes
8+ # uv run slurm.py --yaml modules/Model-Optimizer/tools/launcher/examples/moonshotai/Kimi-K2.5/hf_offline_dflash.yaml --yes
9+
10+ job_name : Kimi-K2.5_DFlash_offline
11+ pipeline :
12+ allow_to_fail : false
13+ skip : false
14+ note :
15+
16+ global_vars :
17+ hf_model : /hf-local/moonshotai/Kimi-K2.6
18+
19+ # Step 1: Data synthesis via vLLM server
20+ # Args before "--" go to vllm-serve; args after "--" go to tools/query.py.
21+ task_0 :
22+ script : common/vllm/query.sh
23+ args :
24+ - --model <<global_vars.hf_model>>
25+ - --tensor-parallel-size 8
26+ - --port 8000
27+ - --host 0.0.0.0
28+ - --trust-remote-code
29+ - --enforce-eager
30+ - --gpu-memory-utilization 0.95
31+ - --max-model-len 4096
32+ - --
33+ - --data /nemo_run/code/modules/Model-Optimizer/examples/dataset/synthetic_conversations_1k.jsonl
34+ - --save /scratchspace/data
35+ environment :
36+ - HF_LOCAL : /hf-local
37+ - VLLM_STARTUP_TIMEOUT : " 1800"
38+ slurm_config :
39+ _factory_ : " slurm_factory"
40+ nodes : 1
41+ ntasks_per_node : 1
42+ gpus_per_node : 8
43+ container : vllm/vllm-openai:latest
You can’t perform that action at this time.
0 commit comments