Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
34 changes: 22 additions & 12 deletions build/Dockerfile.s390x
Original file line number Diff line number Diff line change
Expand Up @@ -7,18 +7,23 @@ WORKDIR /workspace
# -----------------------------
# Install additional tools
# -----------------------------
RUN microdnf install -y \
RUN microdnf clean all && microdnf makecache && \
microdnf install -y \
git jq rsync wget tar \
gcc gcc-c++ gcc-gfortran make cmake \
openssl-devel libffi-devel zlib-devel libsndfile\
freetype-devel bzip2-devel xz-devel lz4-devel snappy-devel \
boost-devel libjpeg-turbo-devel libtiff-devel libpng-devel libwebp-devel \
libxml2-devel libxslt-devel libwebp-devel\
openblas openblas-devel autoconf automake libtool \
numpy perl-core dnf-plugins-core libxml2 libxml2-devel libxslt libxslt-devel \
which findutils \
bc procps-ng \
gcc gcc-c++ gcc-gfortran make cmake ninja-build \
autoconf automake libtool pkg-config \
openssl-devel libffi-devel zlib-devel \
bzip2-devel xz-devel lz4-devel snappy-devel libzstd-devel \
freetype-devel boost-devel \
openblas openblas-devel \
libjpeg-turbo-devel libtiff-devel libpng-devel libwebp-devel \
libxml2 libxml2-devel libxslt libxslt-devel \
ca-certificates iproute infiniband-diags \
perl-core dnf-plugins-core \
libsndfile \
&& microdnf clean all

# -----------------------------
# Python 3.12
# -----------------------------
Expand Down Expand Up @@ -101,8 +106,13 @@ RUN git clone https://github.com/apache/arrow.git && \
# Build pyarrow
# -----------------------------
RUN cd /tmp/arrow/python && \
python -m pip install -r requirements-build.txt && \
python -m pip install .
python -m pip install -r requirements-build.txt build && \
export PYARROW_BUNDLE_ARROW_CPP=1 && \
export PYARROW_WITH_DATASET=1 && \
export ARROW_HOME=/usr/local && \
export PARQUET_HOME=/usr/local && \
python -m build --wheel && \
pip install dist/*.whl

# -----------------------------
# Install inference-perf (FULL deps now possible)
Expand Down Expand Up @@ -347,7 +357,7 @@ RUN SPIECE_WHL=$(ls /tmp/sentencepiece-wheels/*.whl) && \
"$NUMBA_WHL" \
"$OPENCV_WHL" \
"$OUTLINES_WHL" \
"$SPIECE_WHL" \
"$SPIECE_WHL" \
-r requirements/cpu-build.txt \
-r requirements/cpu.txt

Expand Down
22 changes: 14 additions & 8 deletions config/scenarios/examples/spyre-s390x.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -79,8 +79,8 @@ scenario:
# -------------------------------------------------------------------------
images:
benchmark:
repository: icr.io/vopo/llm-d-benchmark
tag: latest
repository: quay.io/nayanakumari/llmd-benchmark-s390x
tag: pyarrow-newfix-v3
pullPolicy: IfNotPresent
vllm:
repository: registry.redhat.io/rhaii-early-access/vllm-spyre-rhel9
Expand Down Expand Up @@ -309,7 +309,11 @@ scenario:
# from model.*, vllmCommon.flags.*, and decode.vllm.additionalFlags.
# -------------------------------------------------------------------------
decode:
replicas: 2
replicas: 1
Comment thread
nayana-kumari marked this conversation as resolved.
tensorParallelSize: 1

accelerator:
count: 1

# Context-length-aware routing: per-pod labels (one per replica).
# Requires multinode enabled (for LWS sequential pod naming) and
Expand Down Expand Up @@ -402,7 +406,7 @@ scenario:
- name: HF_HUB_DISABLE_XET
value: "1"
- name: TORCH_SENDNN_CACHE_ENABLE
value: "0"
value: "1"
- name: TORCH_SENDNN_CACHE_DIR
value: /mnt/spyre-precompiled-model
- name: VLLM_SPYRE_REQUIRE_PRECOMPILED_DECODERS
Expand Down Expand Up @@ -463,9 +467,11 @@ scenario:
workDir: "~/data/spyre"

harness:
name: inference-perf
name: guidellm
experimentProfile: sanity_random.yaml
waitTimeout: 36000
waitTimeout: 600
extraEnvVars:
- name: LD_LIBRARY_PATH
value: "/tmp/arrow/cpp/release/release"
- name: PRE_HARNESS_CMD
value: |
sed -i 's/profile:/rate-type:/g' /workspace/workload/profiles/guidellm/sanity_random.yaml
sed -i 's/max_seconds:/duration:/g' /workspace/workload/profiles/guidellm/sanity_random.yaml
Loading