@@ -11,7 +11,7 @@ COPY requirements.txt .
1111RUN pip install -r requirements.txt --no-cache-dir
1212RUN pip install azureml-acft-common-components=={{latest-pypi-version}}
1313RUN pip install azureml-evaluate-mlflow=={{latest-pypi-version}}
14- RUN pip install verl==0.7.1
14+ RUN pip install verl==0.7.0
1515RUN pip install sacrebleu==2.5.1
1616COPY tracking /opt/conda/envs/ptca/lib/python3.10/site-packages/verl/utils/tracking.py
1717
@@ -34,15 +34,42 @@ COPY __init__ /opt/conda/envs/ptca/lib/python3.10/site-packages/verl/utils/rewar
3434COPY azure_grader /opt/conda/envs/ptca/lib/python3.10/site-packages/verl/utils/reward_score/azure_grader.py
3535COPY azure_python_grader /opt/conda/envs/ptca/lib/python3.10/site-packages/verl/utils/reward_score/azure_python_grader.py
3636COPY utils /opt/conda/envs/ptca/lib/python3.10/site-packages/verl/utils/vllm/utils.py
37- # vllm pinned to 0.19.1 to fix GHSA-6c4r-fmh3-7rh8 (CVE in librosa transitive dep).
38- # Root-cause analysis: librosa was vendored via vllm's `audio` extra; vllm PR #37058 removed
39- # librosa entirely. PyPI metadata confirms vllm 0.18.0 still lists `librosa; extra == "audio"`
40- # while 0.18.1+ (incl. 0.19.1) do NOT. 0.19.1 also fixes CVE-2026-7141.
41- # Parent package (verl 0.7.1) constrains `vllm<=0.12.0,>=0.8.5` only with the [vllm] extra,
42- # which is not used here; verl is installed without the extra, so we override vllm directly.
43- # Staying on the 0.19.x line (same torch==2.10.0 ABI as 0.19.0) preserves compatibility with
44- # the pinned flash-attn wheel and the verl/vLLM internal API patches in vllm_async_server,
45- # vllm_rollout, and utils. 0.20.x bumps torch to 2.11.0 and was avoided.
37+ # vllm pinned to 0.19.1 to fix:
38+ # - GHSA-6c4r-fmh3-7rh8 (librosa transitive dep removed in vllm 0.18.1+ via PR #37058;
39+ # PyPI metadata confirms 0.18.0 still lists `librosa; extra == "audio"` while 0.18.1+ do not)
40+ # - CVE-2026-7141 (fixed in 0.19.1)
41+ # - GHSA-x368-4g9h-fvv4 / VCM 5012008 (fix landed in 0.19.1)
42+ # Parent package (verl 0.7.0) constrains `vllm<=0.12.0,>=0.8.5` only via the optional [vllm]
43+ # extra, which is NOT used in this image (verl is installed without the extra); thus there is
44+ # no parent that pulls vllm — it is a direct top-level install here, and the only available
45+ # remediation path is a direct version override.
46+ #
47+ # RESIDUAL FINDING: GHSA-hpv8-x276-m59f / VCM 5012004 (multimodal token-injection DoS in vLLM's
48+ # OpenAI-compatible API server) is fixed only in vllm>=0.20.0. We are NOT upgrading to 0.20.x
49+ # in this build because the cascade has three concrete blockers verified via PyPI metadata on
50+ # 2026-05-12:
51+ # 1. sglang stack: vllm 0.20.0 requires torch==2.11.0 (exact pin); the currently pinned
52+ # sglang==0.5.10 requires torch==2.9.1 (also exact). The minimum sglang line that allows
53+ # torch 2.11.0 is sglang==0.5.11 (which also bumps transformers==5.6.0 and pulls a new
54+ # sgl-kernel/torch-memory-saver matrix) — a multi-package transition.
55+ # 2. flash-attn ABI: the prebuilt wheel
56+ # https://github.com/yeshsurya/flash-attention/releases/download/v2.8.3-linux-1/
57+ # flash_attn-2.8.3-cp310-cp310-linux_x86_64.whl is the only asset published at that
58+ # release tag and is built against an older torch ABI (torch 2.10 era, matching the
59+ # torch that vllm 0.19.x resolves to); no torch 2.11 build is published there.
60+ # 3. vLLM v1-engine internal patches: the COPY'd files (vllm_async_server, vllm_rollout,
61+ # utils) import `vllm.v1.engine.async_llm.AsyncLLM`, `vllm.v1.engine.core.EngineCoreProc`,
62+ # `vllm.v1.engine.utils.CoreEngineProcManager`, `vllm.v1.executor.abstract.Executor`,
63+ # `vllm.utils.argparse_utils`, `vllm.utils.network_utils`, `vllm.config.LoRAConfig`. These
64+ # v1-engine internals frequently shift across vllm minor lines (0.19→0.20) and would
65+ # require a full re-validation of the patches.
66+ # Risk acceptance: this image consumes vLLM internally for RFT training rollouts; it is
67+ # deployed in internal/trusted training workloads and does not expose a public OpenAI
68+ # endpoint for unauthenticated multimodal traffic, so the practical exposure of the DoS path
69+ # is limited. The override avoids a high-risk torch / sglang / flash-attn / DeepGEMM /
70+ # custom-vLLM-patch requalification in a single security bump. Re-evaluate in the next
71+ # refresh once the flash-attn wheel and the vllm_async_server/vllm_rollout patches are
72+ # updated for vllm 0.20.x (sister env acpt-grpo already runs vllm==0.20.1 successfully).
4673RUN pip install vllm==0.19.1
4774# Keep xgrammar at the patched floor even when pulled transitively by vllm.
4875RUN pip install --no-cache-dir 'xgrammar>=0.1.32'
@@ -60,13 +87,17 @@ RUN pip install https://github.com/yeshsurya/flash-attention/releases/download/v
6087# GitPython>=3.1.47: GHSA-x2qx-6953-8485, GHSA-rpm5-65cw-6hj4; transitive dep of wandb (requires
6188# gitpython!=3.1.29,>=1.0.0 as of 0.26.1), parent uses loose floor — no wandb release forces >=3.1.47
6289RUN pip install --upgrade cryptography==46.0.7 'fastmcp>=3.2.0' 'Mako>=1.3.11' 'lxml>=6.1.0' 'transformers>=5.0.0rc3' 'GitPython>=3.1.47'
63- RUN python -c "from transformers import Cache, DynamicCache, EncoderDecoderCache, PreTrainedModel; import peft; import verl.utils.model; from verl.utils.transformers_compat import get_auto_model_for_vision2seq; assert get_auto_model_for_vision2seq() is not None; print('verl-transformers compatibility ok')"
6490# python-dotenv>=1.2.2: GHSA-mf9w-mj56-hr94; transitive dep of pydantic-settings (requires >=0.21.0),
6591# uvicorn (optional, requires >=0.13), and fastmcp (requires >=1.1.0). All parents use loose floors,
6692# so no parent upgrade can force >=1.2.2. Base image ships 1.2.1 in base conda env; we patch
6793# both base (python 3.13) and ptca (python 3.10) envs to cover all install paths.
68- RUN conda run -n base python -m pip install --no-cache-dir --upgrade 'python-dotenv>=1.2.2'
69- RUN pip install --no-cache-dir --upgrade 'python-dotenv>=1.2.2'
94+ # pip>=26.1.1: GHSA-jp4c-xjxw-mgf9 / VCM 5011855 (CVE-2026-6357). Base image biweekly.202605.1
95+ # ships pip 26.0.1 in BOTH the ptca (py3.10) and base (py3.13) conda envs (per scan paths).
96+ # pip is the Python package installer itself — it is bootstrapped by the conda/python
97+ # distribution and has no parent package that pulls it in, so the only available remediation
98+ # is a direct upgrade in each conda environment. Pattern matches sister env acpt-grpo.
99+ RUN conda run -n base python -m pip install --no-cache-dir --upgrade 'python-dotenv>=1.2.2' 'pip>=26.1.1'
100+ RUN pip install --no-cache-dir --upgrade 'python-dotenv>=1.2.2' 'pip>=26.1.1'
70101# ray vendors its own copy of aiohttp inside thirdparty_files/ for runtime_env agent;
71102# the vendored copy is not upgraded by pip install above. Patching all copies in-place.
72103RUN find /opt/conda/envs/ptca/lib/python3.10/site-packages/ray -type d -name 'thirdparty_files' | while read dir; do \
0 commit comments