diff --git a/.github/workflows/generate_benchmark_matrix.yml b/.github/workflows/generate_benchmark_matrix.yml
index 0092905a4ac1c..12bf7d46e04e2 100644
--- a/.github/workflows/generate_benchmark_matrix.yml
+++ b/.github/workflows/generate_benchmark_matrix.yml
@@ -44,7 +44,7 @@ jobs:
   generate:
     name: Generate Matrix (${{ inputs.workflow_type }})
     runs-on: linux-mi250-4
-    container: rocm/tensorflow-build@sha256:7fcfbd36b7ac8f6b0805b37c4248e929e31cf5ee3af766c8409dd70d5ab65faa
+    container: ${{ vars.DOCKER_IMAGE }}
     outputs:
       matrix_json_output: ${{ steps.run_generator.outputs.matrix_json }}
     defaults:
diff --git a/.github/workflows/postsubmit_benchmark.yml b/.github/workflows/postsubmit_benchmark.yml
index cf71b802e07ed..e23ecc90f0295 100644
--- a/.github/workflows/postsubmit_benchmark.yml
+++ b/.github/workflows/postsubmit_benchmark.yml
@@ -147,6 +147,15 @@ jobs:
           ref: ${{ env.CHECKOUT_REF }}
           persist-credentials: false
 
+      - name: Get RBE cluster keys
+        env:
+          RBE_CI_CERT: ${{ secrets.RBE_CI_CERT }}
+          RBE_CI_KEY: ${{ secrets.RBE_CI_KEY }}
+        run: |
+          mkdir -p /tf/certificates
+          echo "$RBE_CI_CERT" > /tf/certificates/ci-cert.crt
+          echo "$RBE_CI_KEY" > /tf/certificates/ci-cert.key
+
       - name: Build Binaries
         id: build_binaries
         run: |
diff --git a/build_tools/ci/build.py b/build_tools/ci/build.py
index 6995adf188ec6..a0c130a8be870 100755
--- a/build_tools/ci/build.py
+++ b/build_tools/ci/build.py
@@ -701,18 +701,24 @@ def nvidia_gpu_build_with_compute_capability(
 Build(
     type_=BuildType.XLA_LINUX_X86_GPU_ROCM_BENCHMARK_PRESUBMIT_GITHUB_ACTIONS,
     repo="openxla/xla",
-    configs=("rocm_ci",),
+    configs=("rocm_ci", "rocm_rbe"),
     target_patterns=_XLA_GPU_PRESUBMIT_BENCHMARKS_DEFAULT_TARGET_PATTERNS,
     test_tag_filters=rocm_tag_filters,
     build_tag_filters=rocm_tag_filters,
     options={
         "run_under": "//build_tools/ci:parallel_gpu_execute",
         "//xla/tsl:ci_build": True,
+        "remote_download_toplevel": True,  # Override remote_download_minimal from rocm_rbe
+        "spawn_strategy": "local",
         **_DEFAULT_BAZEL_OPTIONS,
     },
     repo_env={
         "TF_ROCM_AMDGPU_TARGETS": "gfx90a",
+        "TF_ROCM_RBE_DOCKER_IMAGE": "rocm/" # rocm/tensorflow-build:latest-jammy-pythonall-rocm7.2.1-ci_official
+           "tensorflow-build@sha256:"
+           "66eb4c1e39db76fae2eb0a1029490acbe7bfce0e00d6ab435e170f743921f4c4"
     },
+    startup_options={"bazelrc": "build_tools/rocm/rocm_xla.bazelrc"},
     subcommand="build",
 )
 
diff --git a/xla/tools/benchmarks/registries/default_registry.yml b/xla/tools/benchmarks/registries/default_registry.yml
index c2ec56d097027..1188bbe084ff1 100644
--- a/xla/tools/benchmarks/registries/default_registry.yml
+++ b/xla/tools/benchmarks/registries/default_registry.yml
@@ -30,7 +30,7 @@ benchmarks: [
       topology: { num_hosts: 1, num_devices_per_host: 1, multi_host: false, multi_device: false }
       target_metrics: [GPU_DEVICE_TIME, GPU_DEVICE_MEMCPY_TIME]
       workflow_type: [POSTSUBMIT]
-      runtime_flags: ["--num_repeats=5", "--hlo_argument_mode=uninitialized"]
+      runtime_flags: ["--num_repeats=5"]
     }]
     update_frequency_policy: QUARTERLY
   },
@@ -48,8 +48,7 @@ benchmarks: [
       topology: { num_hosts: 1, num_devices_per_host: 1, multi_host: false, multi_device: false }
       target_metrics: [GPU_DEVICE_TIME, GPU_DEVICE_MEMCPY_TIME]
       workflow_type: [POSTSUBMIT]
-      runtime_flags: ["--num_repeats=5", "--hlo_argument_mode=uninitialized"]
-      xla_compilation_flags: ["--xla_gpu_enable_command_buffer="]
+      runtime_flags: ["--num_repeats=5"]
     }]
     update_frequency_policy: QUARTERLY
     # TODO(juliagmt): remove this label once the benchmark is stable.
diff --git a/xla/tools/benchmarks/utils/generate_benchmark_matrices.cc b/xla/tools/benchmarks/utils/generate_benchmark_matrices.cc
index 3762d99138fa0..2ba224e247bb2 100644
--- a/xla/tools/benchmarks/utils/generate_benchmark_matrices.cc
+++ b/xla/tools/benchmarks/utils/generate_benchmark_matrices.cc
@@ -130,7 +130,7 @@ GetHardwareToContainerImage() {
           {"GPU_MI250",
            "rocm/"
            "tensorflow-build@sha256:"
-           "7fcfbd36b7ac8f6b0805b37c4248e929e31cf5ee3af766c8409dd70d5ab65faa"},
+           "66eb4c1e39db76fae2eb0a1029490acbe7bfce0e00d6ab435e170f743921f4c4"},
       };
   return *kHardwareToContainerImage;
 }