diff --git a/tools/launcher/common/service_utils.sh b/tools/launcher/common/service_utils.sh index f9d15b2798f..4fa0a63b729 100755 --- a/tools/launcher/common/service_utils.sh +++ b/tools/launcher/common/service_utils.sh @@ -18,8 +18,8 @@ native_mpi_rank=$OMPI_COMM_WORLD_RANK native_mpi_local_rank=$OMPI_COMM_WORLD_LOCAL_RANK # Works with Slurm launching with `--mpi=pmix` -mpi_rank=${PMIX_RANK:-$native_mpi_rank} -mpi_local_rank=${PMIX_LOCAL_RANK:-$native_mpi_local_rank} +mpi_rank=${PMIX_RANK:-${native_mpi_rank:-${SLURM_PROCID:-0}}} +mpi_local_rank=${PMIX_LOCAL_RANK:-${native_mpi_local_rank:-${SLURM_LOCALID:-0}}} FAIL=0 FAIL_EXIT=0 @@ -48,8 +48,23 @@ function report_result { } function util_install_extra_dep { + local _marker=/tmp/.nmm_extra_dep_installed + if [[ -f "$_marker" ]]; then + return 0 + fi if [[ "$mpi_local_rank" -eq 0 ]]; then pip install diskcache + local _nvrx_dir + _nvrx_dir="$(mktemp -d)/nvidia-resiliency-ext" + git clone --depth 1 https://github.com/NVIDIA/nvidia-resiliency-ext "${_nvrx_dir}" \ + && pip install "${_nvrx_dir}" + touch "$_marker" + else + local _waited=0 + while [[ ! -f "$_marker" && $_waited -lt 600 ]]; do + sleep 1 + _waited=$((_waited + 1)) + done fi } diff --git a/tools/launcher/examples/moonshotai/Kimi-K2.5/hf_offline_dflash.yaml b/tools/launcher/examples/moonshotai/Kimi-K2.5/hf_offline_dflash.yaml new file mode 100644 index 00000000000..179e9960308 --- /dev/null +++ b/tools/launcher/examples/moonshotai/Kimi-K2.5/hf_offline_dflash.yaml @@ -0,0 +1,43 @@ +# DFlash offline synthetic data generation pipeline for Kimi-K2.5. +# +# 1-step pipeline (task_0 only): +# task_0: Data synthesis — query vLLM server to generate prompt samples +# +# Usage: +# uv run launch.py --yaml examples/moonshotai/Kimi-K2.5/hf_offline_dflash.yaml --yes +# uv run slurm.py --yaml modules/Model-Optimizer/tools/launcher/examples/moonshotai/Kimi-K2.5/hf_offline_dflash.yaml --yes + +job_name: Kimi-K2.5_DFlash_offline +pipeline: + allow_to_fail: false + skip: false + note: + + global_vars: + hf_model: /hf-local/moonshotai/Kimi-K2.6 + + # Step 1: Data synthesis via vLLM server + # Args before "--" go to vllm-serve; args after "--" go to tools/query.py. + task_0: + script: common/vllm/query.sh + args: + - --model <> + - --tensor-parallel-size 8 + - --port 8000 + - --host 0.0.0.0 + - --trust-remote-code + - --enforce-eager + - --gpu-memory-utilization 0.95 + - --max-model-len 4096 + - -- + - --data /nemo_run/code/modules/Model-Optimizer/examples/dataset/synthetic_conversations_1k.jsonl + - --save /scratchspace/data + environment: + - HF_LOCAL: /hf-local + - VLLM_STARTUP_TIMEOUT: "1800" + slurm_config: + _factory_: "slurm_factory" + nodes: 1 + ntasks_per_node: 1 + gpus_per_node: 8 + container: vllm/vllm-openai:latest \ No newline at end of file