Skip to content

Commit f75d795

Browse files
authored
[None][fix] fix FA4 install in devel docker (#14706)
Signed-off-by: Olivia Stoner <245287810+o-stoner@users.noreply.github.com>
1 parent 1047091 commit f75d795

4 files changed

Lines changed: 19 additions & 8 deletions

File tree

docker/Dockerfile.multi

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -71,6 +71,7 @@ RUN --mount=type=bind,source=docker/common,target=/opt/docker/common \
7171
GITHUB_MIRROR=${GITHUB_MIRROR} bash /opt/docker/common/install_ucx.sh && \
7272
GITHUB_MIRROR=${GITHUB_MIRROR} bash /opt/docker/common/install_nixl.sh && \
7373
bash /opt/docker/common/install_etcd.sh && \
74+
GITHUB_MIRROR=${GITHUB_MIRROR} bash /opt/docker/common/install_fa4.sh && \
7475
rm -rf /root/.cache/uv/archive-v0 && \
7576
# WAR against https://github.com/advisories/GHSA-58pv-8j8x-9vj2
7677
rm -rf /usr/local/lib/python3.12/dist-packages/setuptools/_vendor/jaraco.context-5.3.0.dist-info && \

docker/common/install_fa4.sh

Lines changed: 10 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,10 @@
1+
#!/bin/bash
2+
3+
set -ex
4+
5+
FLASH_ATTN_4_VERSION="4.0.0b11"
6+
7+
if [ -n "${GITHUB_MIRROR}" ]; then
8+
export PIP_INDEX_URL="https://urm.nvidia.com/artifactory/api/pypi/pypi-remote/simple"
9+
fi
10+
pip3 install "flash-attn-4==${FLASH_ATTN_4_VERSION}"

jenkins/current_image_tags.properties

Lines changed: 5 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -13,8 +13,8 @@
1313
# images are adopted from PostMerge pipelines, the abbreviated commit hash is used instead.
1414
IMAGE_NAME=urm.nvidia.com/sw-tensorrt-docker/tensorrt-llm
1515

16-
LLM_DOCKER_IMAGE=urm.nvidia.com/sw-tensorrt-docker/tensorrt-llm:pytorch-26.02-py3-x86_64-ubuntu24.04-trt10.15.1.29-skip-tritondevel-202606051544-14972
17-
LLM_SBSA_DOCKER_IMAGE=urm.nvidia.com/sw-tensorrt-docker/tensorrt-llm:pytorch-26.02-py3-sbsa-ubuntu24.04-trt10.15.1.29-skip-tritondevel-202606051544-14972
18-
LLM_ROCKYLINUX8_PY310_DOCKER_IMAGE=urm.nvidia.com/sw-tensorrt-docker/tensorrt-llm:cuda-13.1.0-devel-rocky8-x86_64-rocky8-py310-trt10.15.1.29-skip-tritondevel-202606051544-14972
19-
LLM_ROCKYLINUX8_PY312_DOCKER_IMAGE=urm.nvidia.com/sw-tensorrt-docker/tensorrt-llm:cuda-13.1.0-devel-rocky8-x86_64-rocky8-py312-trt10.15.1.29-skip-tritondevel-202606051544-14972
20-
LLM_SBSA_WHEEL_DOCKER_IMAGE=urm.nvidia.com/sw-tensorrt-docker/tensorrt-llm:cuda-13.1.0-devel-ubuntu24.04-sbsa-ubuntu24.04-py312-trt10.15.1.29-skip-tritondevel-202606051544-14972
16+
LLM_DOCKER_IMAGE=urm.nvidia.com/sw-tensorrt-docker/tensorrt-llm:pytorch-26.02-py3-x86_64-ubuntu24.04-trt10.15.1.29-skip-tritondevel-202606091844-14706
17+
LLM_SBSA_DOCKER_IMAGE=urm.nvidia.com/sw-tensorrt-docker/tensorrt-llm:pytorch-26.02-py3-sbsa-ubuntu24.04-trt10.15.1.29-skip-tritondevel-202606091844-14706
18+
LLM_ROCKYLINUX8_PY310_DOCKER_IMAGE=urm.nvidia.com/sw-tensorrt-docker/tensorrt-llm:cuda-13.1.0-devel-rocky8-x86_64-rocky8-py310-trt10.15.1.29-skip-tritondevel-202606091844-14706
19+
LLM_ROCKYLINUX8_PY312_DOCKER_IMAGE=urm.nvidia.com/sw-tensorrt-docker/tensorrt-llm:cuda-13.1.0-devel-rocky8-x86_64-rocky8-py312-trt10.15.1.29-skip-tritondevel-202606091844-14706
20+
LLM_SBSA_WHEEL_DOCKER_IMAGE=urm.nvidia.com/sw-tensorrt-docker/tensorrt-llm:cuda-13.1.0-devel-ubuntu24.04-sbsa-ubuntu24.04-py312-trt10.15.1.29-skip-tritondevel-202606091844-14706

tests/unittest/_torch/sampler/test_torch_sampler.py

Lines changed: 3 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -2173,11 +2173,11 @@ def _validate_token_frequencies(
21732173
test_expected_counts != 0, 0, test_token_counts
21742174
)
21752175
assert (test_token_counts_for_zero_prob == 0).all()
2176-
test_expected_counts_ma = np.ma.masked_array(
2176+
test_expected_counts_ma = np.ma.MaskedArray(
21772177
test_expected_counts.numpy(),
21782178
mask=(test_expected_counts.numpy() == 0),
21792179
)
2180-
test_token_counts_ma = np.ma.masked_array(
2180+
test_token_counts_ma = np.ma.MaskedArray(
21812181
test_token_counts.numpy(),
21822182
mask=test_expected_counts_ma.mask,
21832183
)
@@ -2227,7 +2227,7 @@ def _validate_token_frequencies(
22272227
prob_delta = np.where(prob_delta > 5e-2, prob_delta, 0) # NB: this is rather liberal
22282228
# bound relative differences on remaining probs
22292229
prob_delta_rel = (
2230-
np.ma.masked_array(num_samples * prob_delta, mask=test_expected_counts_ma.mask)
2230+
np.ma.MaskedArray(num_samples * prob_delta, mask=test_expected_counts_ma.mask)
22312231
/ test_expected_counts_ma.data
22322232
)
22332233
assert prob_delta_rel.max() < 0.05

0 commit comments

Comments
 (0)