File tree Expand file tree Collapse file tree
Expand file tree Collapse file tree Original file line number Diff line number Diff line change 1818native_mpi_rank=$OMPI_COMM_WORLD_RANK
1919native_mpi_local_rank=$OMPI_COMM_WORLD_LOCAL_RANK
2020# Works with Slurm launching with `--mpi=pmix`
21- mpi_rank=${PMIX_RANK:- $native_mpi_rank }
22- mpi_local_rank=${PMIX_LOCAL_RANK:- $native_mpi_local_rank }
21+ mpi_rank=${PMIX_RANK:- ${ native_mpi_rank:- ${SLURM_PROCID :- 0} } }
22+ mpi_local_rank=${PMIX_LOCAL_RANK:- ${ native_mpi_local_rank:- ${SLURM_LOCALID :- 0} } }
2323
2424FAIL=0
2525FAIL_EXIT=0
@@ -48,8 +48,23 @@ function report_result {
4848}
4949
5050function util_install_extra_dep {
51+ local _marker=/tmp/.nmm_extra_dep_installed
52+ if [[ -f " $_marker " ]]; then
53+ return 0
54+ fi
5155 if [[ " $mpi_local_rank " -eq 0 ]]; then
5256 pip install diskcache
57+ local _nvrx_dir
58+ _nvrx_dir=" $( mktemp -d) /nvidia-resiliency-ext"
59+ git clone --depth 1 https://github.com/NVIDIA/nvidia-resiliency-ext " ${_nvrx_dir} " \
60+ && pip install " ${_nvrx_dir} "
61+ touch " $_marker "
62+ else
63+ local _waited=0
64+ while [[ ! -f " $_marker " && $_waited -lt 600 ]]; do
65+ sleep 1
66+ _waited=$(( _waited + 1 ))
67+ done
5368 fi
5469}
5570
Original file line number Diff line number Diff line change 1+ # DFlash offline speculative decoding pipeline for Kimi-K2.5 (input model).
2+ # This YAML contains only task_0 (data synthesis) for synth_support stage.
3+ # Tasks 1-3 are added by downstream stages.
4+
5+ job_name : Kimi-K2.5_DFlash_offline
6+ pipeline :
7+ allow_to_fail : false
8+ skip : false
9+ note :
10+
11+ global_vars :
12+ hf_model : /hf-local/moonshotai/Kimi-K2.6
13+
14+ # Step 1: Data synthesis via vLLM server
15+ # Args before "--" go to vLLM server; args after "--" go to tools/query.py.
16+ task_0 :
17+ script : common/vllm/query.sh
18+ args :
19+ - --model <<global_vars.hf_model>>
20+ - --tensor-parallel-size 8
21+ - --port 8000
22+ - --host 0.0.0.0
23+ - --trust_remote_code
24+ - --enforce-eager
25+ - --gpu-memory-utilization 0.95
26+ - --max-model-len 4096
27+ - --
28+ - --data /hf-local/modelopt/Speculative-Decoding-Prompt-Samples
29+ - --save /scratchspace/data
30+ environment :
31+ - HF_LOCAL : /hf-local
32+ - VLLM_STARTUP_TIMEOUT : " 1800"
33+ slurm_config :
34+ _factory_ : " slurm_factory"
35+ nodes : 1
36+ ntasks_per_node : 1
37+ gpus_per_node : 8
38+ container : vllm/vllm-openai:latest
You can’t perform that action at this time.
0 commit comments