Skip to content

Commit 54366c7

Browse files
Update stack to PT2.10
1 parent 8e40d06 commit 54366c7

5 files changed

Lines changed: 92 additions & 26 deletions

File tree

README.md

Lines changed: 2 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -75,9 +75,8 @@ conda activate forge
7575

7676
Notes:
7777
- Requires Intel oneAPI toolkit installed at `$ONEAPI_ROOT`, `/opt/intel/oneapi`, or loadable via `module load intel/oneapi`.
78-
- Python version must match `IPEX_PYTHON_VERSION` in `assets/versions.sh`.
79-
- The script installs PyTorch + IPEX via vLLM's XPU requirements, then locks their versions with pip constraints.
80-
- XPU builds install Monarch with `USE_TENSOR_ENGINE=0`, so RDMA and distributed tensor features are disabled for now.
78+
- Python version must match `XPU_PYTHON_VERSION` in `assets/versions.sh`.
79+
- XPU build installs Monarch with `USE_TENSOR_ENGINE=0`, so RDMA and distributed tensor features are disabled for now.
8180
- Optional flag: `--use-sudo` (system packages via `apt`/`dnf` instead of conda).
8281
- Re-activate your conda environment after install to pick up the oneAPI activation hook.
8382

assets/versions.sh

Lines changed: 5 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -11,11 +11,12 @@
1111
PYTORCH_VERSION="2.9.0"
1212
# ROCm/XPU builds vLLM from source (no prebuilt ROCm/XPU wheels available)
1313
VLLM_ROCM_VERSION="v0.10.0"
14-
VLLM_XPU_VERSION="v0.13.0"
15-
# IPEX wheels shipped with vLLM has hard python version requirement
16-
IPEX_PYTHON_VERSION="3.12"
14+
VLLM_XPU_VERSION="v0.17.0"
15+
# PyTorch XPU version (vLLM v0.17+ dropped IPEX in favour of native XPU support)
16+
PYTORCH_XPU_VERSION="2.10.0"
17+
# vllm-xpu-kernels wheels only ship for Python 3.12
18+
XPU_PYTHON_VERSION="3.12"
1719
TORCHSTORE_BRANCH="no-monarch-2026.01.05"
1820
# ROCm/XPU builds these from source (no ROCm/XPU wheels); CUDA uses pyproject pins.
1921
TORCHTITAN_VERSION="v0.2.0"
20-
TORCHTITAN_XPU_COMMIT="e61f2cce4fd9c54d314ff0a2dabe035b80a5d49c"
2122
MONARCH_VERSION="v0.2.0"

scripts/install_xpu.sh

Lines changed: 45 additions & 15 deletions
Original file line numberDiff line numberDiff line change
@@ -39,14 +39,18 @@ if [ -z "${TORCHSTORE_BRANCH:-}" ]; then
3939
log_error "TORCHSTORE_BRANCH not set in $VERSIONS_FILE"
4040
exit 1
4141
fi
42-
if [ -z "${TORCHTITAN_XPU_COMMIT:-}" ]; then
43-
log_error "TORCHTITAN_XPU_COMMIT not set in $VERSIONS_FILE"
42+
if [ -z "${TORCHTITAN_VERSION:-}" ]; then
43+
log_error "TORCHTITAN_VERSION not set in $VERSIONS_FILE"
4444
exit 1
4545
fi
4646
if [ -z "${MONARCH_VERSION:-}" ]; then
4747
log_error "MONARCH_VERSION not set in $VERSIONS_FILE"
4848
exit 1
4949
fi
50+
if [ -z "${PYTORCH_XPU_VERSION:-}" ]; then
51+
log_error "PYTORCH_XPU_VERSION not set in $VERSIONS_FILE"
52+
exit 1
53+
fi
5054

5155
# Defaults (override via environment variables)
5256
FORGE_DEPS_DIR="${FORGE_DEPS_DIR:-$HOME/.cache/torchforge}"
@@ -64,17 +68,17 @@ check_conda_env() {
6468
}
6569

6670
check_python_version() {
67-
local required="$IPEX_PYTHON_VERSION"
71+
local required="$XPU_PYTHON_VERSION"
6872
local actual
6973
actual=$(python -c "import sys; print(f'{sys.version_info.major}.{sys.version_info.minor}')")
7074

7175
if [ "$actual" != "$required" ]; then
72-
log_error "Python ${actual} detected, but vLLM for XPU requires Python ${required}"
76+
log_error "Python ${actual} detected, but vllm-xpu-kernels requires Python ${required}"
7377
log_info "Recreate your conda env with the correct version:"
7478
log_info " conda create -n forge python=${required} -y"
7579
exit 1
7680
fi
77-
log_info "Python version ${actual} matches IPEX requirement"
81+
log_info "Python version ${actual} matches XPU requirement"
7882
}
7983

8084
# Check required command
@@ -260,16 +264,13 @@ ensure_rust() {
260264
create_constraints_file() {
261265
local torch_version
262266
torch_version=$(python -c "import torch; print(torch.__version__)")
263-
local ipex_version
264-
ipex_version=$(python -c "import intel_extension_for_pytorch; print(intel_extension_for_pytorch.__version__)")
265267

266268
local constraints_file="${FORGE_DEPS_DIR}/constraints.txt"
267269
cat > "$constraints_file" <<EOF
268270
torch==${torch_version}
269-
intel-extension-for-pytorch==${ipex_version}
270271
EOF
271272
export PIP_CONSTRAINT="$constraints_file"
272-
log_info "Pip constraints locked: torch==${torch_version}, IPEX==${ipex_version}"
273+
log_info "Pip constraints locked: torch==${torch_version}"
273274
}
274275

275276
install_vllm_xpu() {
@@ -278,24 +279,52 @@ install_vllm_xpu() {
278279
log_info "Installing vLLM ${VLLM_XPU_VERSION} from source (XPU)"
279280
ensure_repo "https://github.com/vllm-project/vllm.git" "$vllm_dir" "$VLLM_XPU_VERSION"
280281

281-
# Installs PyTorch + IPEX + all XPU deps
282+
# Let vLLM's xpu requirements drive the PyTorch + triton-xpu install.
282283
python -m pip install -r "${vllm_dir}/requirements/xpu.txt"
283284

284-
# Lock torch + IPEX so later installs can't clobber them
285+
# triton-xpu (required by torch 2.10+xpu) and vanilla triton (required by
286+
# xgrammar) both install into the same `triton/` namespace directory.
287+
# In PyTorch <=2.9 the XPU package was called pytorch-triton-xpu and used a
288+
# separate namespace, so the two coexisted. After the rename to triton-xpu
289+
# pip installs both, and vanilla triton's libtriton.so overwrites the XPU
290+
# one — stripping the 'intel' backend symbol.
291+
#
292+
# Fix: force-reinstall triton-xpu so its libtriton.so (with 'intel') wins.
293+
# We keep vanilla triton installed so xgrammar's pip dependency stays
294+
# satisfied (triton-xpu does not declare Provides: triton).
295+
local triton_xpu_version
296+
triton_xpu_version=$(python -c "import importlib.metadata; print(importlib.metadata.version('triton-xpu'))")
297+
log_info "Fixing triton namespace conflict: reinstalling triton-xpu ${triton_xpu_version}"
298+
python -m pip install "triton-xpu==${triton_xpu_version}" --force-reinstall --no-deps \
299+
--extra-index-url https://download.pytorch.org/whl/xpu
300+
301+
# Lock torch so later installs can't clobber it
285302
create_constraints_file
286303

287304
VLLM_TARGET_DEVICE=xpu \
288305
python -m pip install -e "$vllm_dir" --no-build-isolation
289306
}
290307

308+
verify_pytorch_xpu() {
309+
local actual_version
310+
actual_version=$(python -c "import torch; print(torch.__version__.split('+')[0])")
311+
312+
if [ "$actual_version" != "${PYTORCH_XPU_VERSION}" ]; then
313+
log_error "Expected PyTorch ${PYTORCH_XPU_VERSION} but got ${actual_version}"
314+
log_info "vLLM's requirements may have installed an incompatible version"
315+
exit 1
316+
fi
317+
log_info "PyTorch ${actual_version}+xpu verified"
318+
}
319+
291320
install_torchstore() {
292321
log_info "Installing torchstore from branch ${TORCHSTORE_BRANCH}"
293322
python -m pip install "git+https://github.com/meta-pytorch/torchstore.git@${TORCHSTORE_BRANCH}"
294323
}
295324

296325
install_torchtitan() {
297-
log_info "Installing torchtitan from tag ${TORCHTITAN_XPU_COMMIT}"
298-
python -m pip install "git+https://github.com/pytorch/torchtitan.git@${TORCHTITAN_XPU_COMMIT}"
326+
log_info "Installing torchtitan from tag ${TORCHTITAN_VERSION}"
327+
python -m pip install "git+https://github.com/pytorch/torchtitan.git@${TORCHTITAN_VERSION}"
299328
}
300329

301330
install_monarch() {
@@ -471,8 +500,9 @@ main() {
471500
install_system_packages "$USE_SUDO"
472501
setup_xpu_env
473502

474-
# vLLM install PyTorch + IPEX + creates constraints
503+
# vLLM installs PyTorch + triton-xpu, fixes triton conflict, creates constraints
475504
install_vllm_xpu
505+
verify_pytorch_xpu
476506

477507
# Everything below is protected by PIP_CONSTRAINT
478508
install_torchstore
@@ -504,4 +534,4 @@ main() {
504534
log_info " conda deactivate && conda activate $CONDA_DEFAULT_ENV"
505535
}
506536

507-
main "$@"
537+
main "$@"

src/forge/actors/vllm/v1/generator.py

Lines changed: 15 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -242,6 +242,21 @@ async def setup(self, host_mesh, worker_registry, gpu_ids: list[str]):
242242
"forge.actors.vllm.v1.forge_executor.ForgeMonarchExecutor"
243243
)
244244

245+
# Disable vLLM's async scheduling for our custom executor backend.
246+
# vLLM's __post_init__ is called twice: once at VllmConfig construction
247+
# and again after EngineCore handshake (_perform_handshakes). In vLLM
248+
# >= 0.14, async_scheduling defaults to None (auto-detect), which the
249+
# first __post_init__ auto-enables to True since executor is still "mp".
250+
# After we override the executor backend above, the second __post_init__
251+
# sees async_scheduling=True with an unrecognized backend and raises
252+
# ValueError. Setting False explicitly is safe for all vLLM versions:
253+
# in <= 0.13 it was already the default, and our MonarchExecutor does
254+
# not use vLLM's async scheduling mechanism.
255+
if hasattr(self.vllm_config, "scheduler_config") and hasattr(
256+
self.vllm_config.scheduler_config, "async_scheduling"
257+
):
258+
self.vllm_config.scheduler_config.async_scheduling = False
259+
245260
# Set up prefetching configuration via additional_config
246261
# There does not seem to be a real difference between pass by env var or via self.vllm_config
247262
if self.prefetch_weights_to_shm:

src/forge/actors/vllm/v1/monarch_executor.py

Lines changed: 25 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -9,8 +9,10 @@
99
from __future__ import annotations
1010

1111
import base64
12+
import inspect
1213
import logging
1314
import os
15+
from functools import partial
1416
from typing import Any, Callable, Dict, List, Optional, Tuple, Union
1517

1618
import cloudpickle
@@ -211,11 +213,24 @@ class WorkerWrapper(WorkerWrapperBase, Actor):
211213
stores).
212214
"""
213215

216+
# Detect whether WorkerWrapperBase accepts vllm_config (vLLM <= 0.13)
217+
# or only rpc_rank/global_rank (vLLM >= 0.14).
218+
_wrapper_accepts_vllm_config: bool = (
219+
"vllm_config" in inspect.signature(WorkerWrapperBase.__init__).parameters
220+
)
221+
214222
def __init__(self, vllm_config):
215223
rank = context().actor_instance.rank.rank
216224
# rpc_rank: rank within this executor (0 to num_workers-1)
217225
# global_rank: rank in distributed group (same as rpc_rank for single executor)
218-
WorkerWrapperBase.__init__(self, vllm_config, rpc_rank=rank, global_rank=rank)
226+
if self._wrapper_accepts_vllm_config:
227+
# vLLM <= 0.13: vllm_config passed at wrapper init time
228+
WorkerWrapperBase.__init__(
229+
self, vllm_config, rpc_rank=rank, global_rank=rank
230+
)
231+
else:
232+
# vLLM >= 0.14: vllm_config flows through init_worker(all_kwargs)
233+
WorkerWrapperBase.__init__(self, rpc_rank=rank, global_rank=rank)
219234
Actor.__init__(self)
220235

221236
def init_worker(self, all_kwargs):
@@ -234,9 +249,15 @@ def init_worker(self, all_kwargs):
234249
super().init_worker(all_kwargs)
235250

236251
@endpoint
237-
def execute_method(self, method: str, *args, **kwargs):
238-
# For simplicity, we only support string method names for now
239-
fn = getattr(self, method)
252+
def execute_method(self, method, *args, **kwargs):
253+
# Support both string method names and bytes (cloudpickle'd callables,
254+
# used by vLLM >= 0.17 for lambda-based collective_rpc calls).
255+
if isinstance(method, bytes):
256+
fn = partial(cloudpickle.loads(method), self)
257+
elif isinstance(method, str):
258+
fn = getattr(self, method)
259+
else:
260+
fn = partial(method, self)
240261
return fn(*args, **kwargs)
241262

242263
@endpoint

0 commit comments

Comments
 (0)