Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 1 addition & 1 deletion .github/workflows/generate_benchmark_matrix.yml
Original file line number Diff line number Diff line change
Expand Up @@ -44,7 +44,7 @@ jobs:
generate:
name: Generate Matrix (${{ inputs.workflow_type }})
runs-on: linux-mi250-4
container: rocm/tensorflow-build@sha256:7fcfbd36b7ac8f6b0805b37c4248e929e31cf5ee3af766c8409dd70d5ab65faa
container: ${{ vars.DOCKER_IMAGE }}
outputs:
matrix_json_output: ${{ steps.run_generator.outputs.matrix_json }}
defaults:
Expand Down
9 changes: 9 additions & 0 deletions .github/workflows/postsubmit_benchmark.yml
Original file line number Diff line number Diff line change
Expand Up @@ -147,6 +147,15 @@ jobs:
ref: ${{ env.CHECKOUT_REF }}
persist-credentials: false

- name: Get RBE cluster keys
env:
RBE_CI_CERT: ${{ secrets.RBE_CI_CERT }}
RBE_CI_KEY: ${{ secrets.RBE_CI_KEY }}
run: |
mkdir -p /tf/certificates
echo "$RBE_CI_CERT" > /tf/certificates/ci-cert.crt
echo "$RBE_CI_KEY" > /tf/certificates/ci-cert.key

- name: Build Binaries
id: build_binaries
run: |
Expand Down
8 changes: 7 additions & 1 deletion build_tools/ci/build.py
Original file line number Diff line number Diff line change
Expand Up @@ -701,18 +701,24 @@ def nvidia_gpu_build_with_compute_capability(
Build(
type_=BuildType.XLA_LINUX_X86_GPU_ROCM_BENCHMARK_PRESUBMIT_GITHUB_ACTIONS,
repo="openxla/xla",
configs=("rocm_ci",),
configs=("rocm_ci", "rocm_rbe"),
target_patterns=_XLA_GPU_PRESUBMIT_BENCHMARKS_DEFAULT_TARGET_PATTERNS,
test_tag_filters=rocm_tag_filters,
build_tag_filters=rocm_tag_filters,
options={
"run_under": "//build_tools/ci:parallel_gpu_execute",
"//xla/tsl:ci_build": True,
"remote_download_toplevel": True, # Override remote_download_minimal from rocm_rbe
"spawn_strategy": "local",
**_DEFAULT_BAZEL_OPTIONS,
},
repo_env={
"TF_ROCM_AMDGPU_TARGETS": "gfx90a",
"TF_ROCM_RBE_DOCKER_IMAGE": "rocm/" # rocm/tensorflow-build:latest-jammy-pythonall-rocm7.2.1-ci_official
"tensorflow-build@sha256:"
"66eb4c1e39db76fae2eb0a1029490acbe7bfce0e00d6ab435e170f743921f4c4"
Comment thread
i-chaochen marked this conversation as resolved.
},
startup_options={"bazelrc": "build_tools/rocm/rocm_xla.bazelrc"},
subcommand="build",
)

Expand Down
5 changes: 2 additions & 3 deletions xla/tools/benchmarks/registries/default_registry.yml
Original file line number Diff line number Diff line change
Expand Up @@ -30,7 +30,7 @@ benchmarks: [
topology: { num_hosts: 1, num_devices_per_host: 1, multi_host: false, multi_device: false }
target_metrics: [GPU_DEVICE_TIME, GPU_DEVICE_MEMCPY_TIME]
workflow_type: [POSTSUBMIT]
runtime_flags: ["--num_repeats=5", "--hlo_argument_mode=uninitialized"]
runtime_flags: ["--num_repeats=5"]
}]
update_frequency_policy: QUARTERLY
},
Expand All @@ -48,8 +48,7 @@ benchmarks: [
topology: { num_hosts: 1, num_devices_per_host: 1, multi_host: false, multi_device: false }
target_metrics: [GPU_DEVICE_TIME, GPU_DEVICE_MEMCPY_TIME]
workflow_type: [POSTSUBMIT]
runtime_flags: ["--num_repeats=5", "--hlo_argument_mode=uninitialized"]
Comment thread
i-chaochen marked this conversation as resolved.
xla_compilation_flags: ["--xla_gpu_enable_command_buffer="]
runtime_flags: ["--num_repeats=5"]
}]
update_frequency_policy: QUARTERLY
# TODO(juliagmt): remove this label once the benchmark is stable.
Expand Down
2 changes: 1 addition & 1 deletion xla/tools/benchmarks/utils/generate_benchmark_matrices.cc
Original file line number Diff line number Diff line change
Expand Up @@ -130,7 +130,7 @@ GetHardwareToContainerImage() {
{"GPU_MI250",
"rocm/"
"tensorflow-build@sha256:"
"7fcfbd36b7ac8f6b0805b37c4248e929e31cf5ee3af766c8409dd70d5ab65faa"},
"66eb4c1e39db76fae2eb0a1029490acbe7bfce0e00d6ab435e170f743921f4c4"},
};
return *kHardwareToContainerImage;
}
Expand Down
Loading