diff --git a/docker/pytorch/Dockerfile.cuda b/docker/pytorch/Dockerfile.cuda
index 93effb4ff91d..96251e67b1ab 100644
--- a/docker/pytorch/Dockerfile.cuda
+++ b/docker/pytorch/Dockerfile.cuda
@@ -204,7 +204,6 @@ RUN chmod +x /usr/local/bin/deep_learning_container.py \
 
 # Security patch — run after all installers so every OS package is covered
 RUN dnf upgrade -y --security --releasever latest \
-  && dnf upgrade -y cuda-compat-* \
   && dnf clean all
 
 # Telemetry bashrc hook — must be after security patch (dnf may replace /etc/bashrc)
@@ -268,7 +267,6 @@ RUN chmod +x /usr/local/bin/deep_learning_container.py \
 
 # Security patch
 RUN dnf upgrade -y --security --releasever latest \
-  && dnf upgrade -y cuda-compat-* \
   && dnf clean all
 
 # Telemetry bashrc hook — must be after security patch (dnf may replace /etc/bashrc)
diff --git a/test/efa/scripts/nccl_allreduce.sh b/test/efa/scripts/nccl_allreduce.sh
index bd185bfb8169..9871e399cf69 100755
--- a/test/efa/scripts/nccl_allreduce.sh
+++ b/test/efa/scripts/nccl_allreduce.sh
@@ -52,9 +52,15 @@ check_efa_nccl_all_reduce_performance(){
     fi
 }
 
+echo "=== Pre-flight checks ==="
+echo "LD_LIBRARY_PATH=$LD_LIBRARY_PATH"
+ls /opt/amazon/ofi-nccl/lib64/libnccl-net* 2>/dev/null || echo "WARNING: ofi-nccl plugin not found"
+nvidia-smi -L 2>&1 | head -2
+echo "==="
+
 echo "Running all_reduce_perf test"
 mpirun -x FI_PROVIDER="efa" -x FI_EFA_FORK_SAFE=1 -n $NODES -N $GPU_COUNT --hostfile $NUM_HOSTS_FILE \
-    -x NCCL_DEBUG=INFO ${USE_DEVICE_RDMA_ARG} -x NCCL_PROTO=simple -x NCCL_ALGO=ring -x RDMAV_FORK_SAFE=1 \
+    -x NCCL_DEBUG=WARN ${USE_DEVICE_RDMA_ARG} -x NCCL_PROTO=simple -x NCCL_ALGO=ring -x RDMAV_FORK_SAFE=1 \
     -x PATH -x LD_LIBRARY_PATH=${CUDA_HOME}/lib:${CUDA_HOME}/lib64:$LD_LIBRARY_PATH \
     -x NCCL_SOCKET_IFNAME=^lo --mca pml ^cm --mca btl tcp,self --mca btl_tcp_if_exclude lo,docker0 --bind-to none \
     /usr/local/bin/all_reduce_perf -b 8 -e 1G -f 2 -g 1 -c 1 -n 100 2>&1 | tee "${TRAINING_LOG}"
@@ -63,7 +69,10 @@ RETURN_VAL=${PIPESTATUS[0]}
 if [ ${RETURN_VAL} -eq 0 ]; then
     echo "check_efa_nccl_all_reduce passed"
 else
-    echo "check_efa_nccl_all_reduce failed"
+    echo "check_efa_nccl_all_reduce failed (exit code: ${RETURN_VAL})"
+    echo "=== Full test log ==="
+    cat "${TRAINING_LOG}"
+    echo "=== End log ==="
 fi
 
 validate_all_reduce_performance_logs
diff --git a/test/efa/test_efa.py b/test/efa/test_efa.py
index aa06a572c0e8..4b8ddb19960a 100644
--- a/test/efa/test_efa.py
+++ b/test/efa/test_efa.py
@@ -11,11 +11,13 @@
 
 import os
 
+import pytest
 from efa.ec2_helpers import (
     DEFAULT_TIMEOUT,
     HOSTS_FILE_LOCATION,
     MASTER_CONTAINER_NAME,
     efa_instances,
+    get_efa_security_group_id,
     run_on_container,
 )
 
@@ -41,6 +43,34 @@ def test_efa_sanity_and_nccl(image_uri=IMAGE_URI):
         worker_conn,
         aws_session,
     ):
+        # Diagnostics: dump NCCL plugin state and CUDA driver info
+        diag = run_on_container(
+            MASTER_CONTAINER_NAME,
+            master_conn,
+            "echo NCCL_NET_PLUGIN=$NCCL_NET_PLUGIN && "
+            "ldconfig -p | grep libcuda 2>&1 && "
+            "echo --- && "
+            "ls -la /usr/local/cuda/compat/libcuda* 2>&1 && "
+            "echo --- && "
+            "cat /etc/ld.so.conf.d/cuda*.conf 2>&1 || true && "
+            "echo --- && "
+            "nvidia-smi --query-gpu=driver_version --format=csv,noheader --id=0 2>&1 || true && "
+            "echo --- && "
+            "nvidia-smi -L 2>&1 || true",
+        )
+        print(f"=== CUDA/NCCL diagnostics (master) ===\n{diag.stdout}")
+
+        # Dump SG rules to check for missing all-traffic self-referencing rule
+        sg_id = get_efa_security_group_id(aws_session)
+        sg_resp = aws_session.ec2.describe_security_groups(GroupIds=[sg_id])
+        sg = sg_resp["SecurityGroups"][0]
+        print(f"=== Security Group {sg_id} rules ===")
+        for rule in sg.get("IpPermissions", []):
+            print(f"  IN: {rule}")
+        for rule in sg.get("IpPermissionsEgress", []):
+            print(f"  OUT: {rule}")
+        print("=== End SG rules ===")
+
         # EFA sanity on master
         run_on_container(
             MASTER_CONTAINER_NAME,
@@ -48,10 +78,26 @@ def test_efa_sanity_and_nccl(image_uri=IMAGE_URI):
             "/test/efa/scripts/efa_sanity.sh",
         )
 
-        # NCCL all_reduce across 2 nodes
-        run_on_container(
+        # NCCL all_reduce across 2 nodes — capture failure details
+        result = run_on_container(
             MASTER_CONTAINER_NAME,
             master_conn,
             f"/test/efa/scripts/nccl_allreduce.sh {HOSTS_FILE_LOCATION} 2",
             timeout=DEFAULT_TIMEOUT,
+            warn=True,
         )
+        if result.failed:
+            print(f"=== NCCL allreduce FAILED (exit code {result.return_code}) ===")
+            print(f"=== stdout ===\n{result.stdout}")
+            print(f"=== stderr ===\n{result.stderr}")
+            log_dump = run_on_container(
+                MASTER_CONTAINER_NAME,
+                master_conn,
+                "cat /test/efa/logs/testEFA.log 2>&1 || echo 'Log file empty or missing'",
+                warn=True,
+            )
+            print(f"=== testEFA.log ===\n{log_dump.stdout}")
+            pytest.fail(
+                f"NCCL allreduce failed with exit code {result.return_code}. "
+                f"See stdout above for details."
+            )
diff --git a/test/security/data/ecr_scan_allowlist/pytorch_runtime/framework_allowlist.json b/test/security/data/ecr_scan_allowlist/pytorch_runtime/framework_allowlist.json
new file mode 100644
index 000000000000..757ef73d0ea4
--- /dev/null
+++ b/test/security/data/ecr_scan_allowlist/pytorch_runtime/framework_allowlist.json
@@ -0,0 +1,7 @@
+[
+    {
+        "vulnerability_id": "CVE-2025-33219",
+        "reason": "NVIDIA display driver vulnerability in cuda-compat. Cannot upgrade cuda-compat beyond base image version (580.95) due to incompatibility with DLAMI embargo host driver (580.150). Awaiting DLAMI driver update to public 580.159+.",
+        "review_by": "2026-06-20"
+    }
+]