Skip to content

Commit 658d85e

Browse files
committed
[executorch][PR] add cuda backend to backend test infra
Pull Request resolved: #17873 Integrate cuda backend into backend test infra; skipped the unsupported test for now ghstack-source-id: 348683618 @exported-using-ghexport Differential Revision: [D93019490](https://our.internmc.facebook.com/intern/diff/D93019490/)
1 parent 38e83d1 commit 658d85e

9 files changed

Lines changed: 311 additions & 98 deletions

File tree

.ci/scripts/test_backend.sh

Lines changed: 26 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -56,6 +56,25 @@ if [[ "$FLOW" == *vulkan* ]]; then
5656
EXTRA_BUILD_ARGS+=" -DEXECUTORCH_BUILD_VULKAN=ON"
5757
fi
5858

59+
if [[ "$FLOW" == *cuda* ]]; then
60+
# Fix libstdc++ GLIBCXX version for CUDA backend.
61+
# The embedded .so files in the CUDA blob require GLIBCXX_3.4.30
62+
# which the default conda libstdc++ doesn't have.
63+
echo "Installing newer libstdc++ for CUDA backend..."
64+
conda install -y -c conda-forge 'libstdcxx-ng>=12'
65+
export LD_LIBRARY_PATH="${CONDA_PREFIX}/lib:${LD_LIBRARY_PATH:-}"
66+
67+
# CUDA backend uses the generic PyTorch test-infra Docker image (not the
68+
# custom executorch image), so PyTorch is NOT pre-installed from a pinned
69+
# commit. Install executorch with nightly PyTorch (which auto-detects
70+
# CUDA via nvcc) and then build the runner manually.
71+
echo "Installing ExecuTorch with nightly PyTorch (CUDA-enabled)..."
72+
./install_executorch.sh --editable
73+
CMAKE_ARGS="$EXTRA_BUILD_ARGS" source .ci/scripts/utils.sh
74+
build_executorch_runner cmake Release
75+
CUDA_SETUP_DONE=1
76+
fi
77+
5978
if [[ "$FLOW" == *arm* ]]; then
6079

6180
# Setup ARM deps.
@@ -78,12 +97,14 @@ if [[ "$FLOW" == *arm* ]]; then
7897
fi
7998
fi
8099

81-
if [[ $IS_MACOS -eq 1 ]]; then
82-
SETUP_SCRIPT=.ci/scripts/setup-macos.sh
83-
else
84-
SETUP_SCRIPT=.ci/scripts/setup-linux.sh
100+
if [[ "${CUDA_SETUP_DONE:-0}" != "1" ]]; then
101+
if [[ $IS_MACOS -eq 1 ]]; then
102+
SETUP_SCRIPT=.ci/scripts/setup-macos.sh
103+
else
104+
SETUP_SCRIPT=.ci/scripts/setup-linux.sh
105+
fi
106+
CMAKE_ARGS="$EXTRA_BUILD_ARGS" ${CONDA_RUN_CMD} $SETUP_SCRIPT --build-tool cmake --build-mode Release --editable true
85107
fi
86-
CMAKE_ARGS="$EXTRA_BUILD_ARGS" ${CONDA_RUN_CMD} $SETUP_SCRIPT --build-tool cmake --build-mode Release --editable true
87108

88109
GOLDEN_DIR="${ARTIFACT_DIR}/golden-artifacts"
89110
export GOLDEN_ARTIFACTS_DIR="${GOLDEN_DIR}"

.github/workflows/_test_backend.yml

Lines changed: 24 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -36,6 +36,26 @@ on:
3636
required: false
3737
type: string
3838
default: linux.4xlarge.memory
39+
docker-image:
40+
description: 'Docker image for Linux jobs'
41+
required: false
42+
type: string
43+
default: ci-image:executorch-ubuntu-22.04-clang12
44+
use-custom-docker-registry:
45+
description: 'Whether to use a custom Docker registry (set false for CUDA to use default PyTorch test-infra images)'
46+
required: false
47+
type: boolean
48+
default: true
49+
gpu-arch-type:
50+
description: 'GPU architecture type (e.g. cuda)'
51+
required: false
52+
type: string
53+
default: ''
54+
gpu-arch-version:
55+
description: 'GPU architecture version (e.g. 12.6)'
56+
required: false
57+
type: string
58+
default: ''
3959

4060
jobs:
4161
test-backend-linux:
@@ -50,7 +70,10 @@ jobs:
5070
with:
5171
ref: ${{ inputs.ref }}
5272
runner: ${{ inputs.runner-linux }}
53-
docker-image: ci-image:executorch-ubuntu-22.04-clang12
73+
docker-image: ${{ inputs.docker-image }}
74+
use-custom-docker-registry: ${{ inputs.use-custom-docker-registry }}
75+
gpu-arch-type: ${{ inputs.gpu-arch-type }}
76+
gpu-arch-version: ${{ inputs.gpu-arch-version }}
5477
submodules: recursive
5578
timeout: ${{ inputs.timeout }}
5679
upload-artifact: test-report-${{ matrix.flow }}-${{ matrix.suite }}
Lines changed: 33 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,33 @@
1+
name: Test CUDA Backend
2+
3+
on:
4+
schedule:
5+
- cron: 0 2 * * *
6+
push:
7+
branches:
8+
- release/*
9+
tags:
10+
- ciflow/nightly/*
11+
pull_request:
12+
paths:
13+
- .github/workflows/test-backend-cuda.yml
14+
- .github/workflows/_test_backend.yml
15+
workflow_dispatch:
16+
17+
concurrency:
18+
group: ${{ github.workflow }}--${{ github.event.pull_request.number || github.sha }}-${{ github.event_name == 'workflow_dispatch' }}
19+
cancel-in-progress: true
20+
21+
jobs:
22+
test-cuda:
23+
uses: ./.github/workflows/_test_backend.yml
24+
with:
25+
backend: cuda
26+
flows: '["cuda"]'
27+
ref: ${{ github.event_name == 'pull_request' && github.event.pull_request.head.sha || github.sha }}
28+
timeout: 120
29+
run-linux: true
30+
runner-linux: linux.g5.4xlarge.nvidia.gpu
31+
use-custom-docker-registry: false
32+
gpu-arch-type: cuda
33+
gpu-arch-version: '12.6'

backends/cuda/test/tester.py

Lines changed: 71 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,71 @@
1+
# Copyright (c) Meta Platforms, Inc. and affiliates.
2+
# All rights reserved.
3+
#
4+
# This source code is licensed under the BSD-style license found in the
5+
# LICENSE file in the root directory of this source tree.
6+
7+
from typing import Any, List, Optional, Tuple
8+
9+
import executorch
10+
import executorch.backends.test.harness.stages as BaseStages
11+
import torch
12+
from executorch.backends.cuda.cuda_backend import CudaBackend
13+
from executorch.backends.cuda.cuda_partitioner import CudaPartitioner
14+
from executorch.backends.test.harness import Tester as TesterBase
15+
from executorch.backends.test.harness.stages import StageType
16+
from executorch.exir import EdgeCompileConfig
17+
from executorch.exir.backend.partitioner import Partitioner
18+
19+
20+
def _create_default_partitioner() -> CudaPartitioner:
21+
"""Create a CudaPartitioner with default compile specs."""
22+
compile_specs = [CudaBackend.generate_method_name_compile_spec("forward")]
23+
return CudaPartitioner(compile_specs)
24+
25+
26+
class ToEdgeTransformAndLower(BaseStages.ToEdgeTransformAndLower):
27+
"""CUDA-specific ToEdgeTransformAndLower stage."""
28+
29+
def __init__(
30+
self,
31+
partitioners: Optional[List[Partitioner]] = None,
32+
edge_compile_config: Optional[EdgeCompileConfig] = None,
33+
):
34+
if partitioners is None:
35+
partitioners = [_create_default_partitioner()]
36+
37+
super().__init__(
38+
default_partitioner_cls=_create_default_partitioner,
39+
partitioners=partitioners,
40+
edge_compile_config=edge_compile_config
41+
or EdgeCompileConfig(_check_ir_validity=False),
42+
)
43+
44+
45+
class CudaTester(TesterBase):
46+
"""
47+
Tester subclass for CUDA backend.
48+
49+
This tester defines the recipe for lowering models to the CUDA backend
50+
using AOTInductor compilation.
51+
"""
52+
53+
def __init__(
54+
self,
55+
module: torch.nn.Module,
56+
example_inputs: Tuple[torch.Tensor],
57+
dynamic_shapes: Optional[Tuple[Any]] = None,
58+
):
59+
stage_classes = (
60+
executorch.backends.test.harness.Tester.default_stage_classes()
61+
| {
62+
StageType.TO_EDGE_TRANSFORM_AND_LOWER: ToEdgeTransformAndLower,
63+
}
64+
)
65+
66+
super().__init__(
67+
module=module,
68+
stage_classes=stage_classes,
69+
example_inputs=example_inputs,
70+
dynamic_shapes=dynamic_shapes,
71+
)

backends/test/harness/stages/serialize.py

Lines changed: 26 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -1,11 +1,9 @@
11
import copy
22
import logging
3-
4-
from typing import Optional
3+
from typing import Dict, Optional
54

65
from executorch.backends.test.harness.stages.stage import Stage, StageType
76
from executorch.exir import ExecutorchProgramManager
8-
97
from torch.utils._pytree import tree_flatten
108

119
logger = logging.getLogger(__name__)
@@ -23,12 +21,15 @@
2321
class Serialize(Stage):
2422
def __init__(self):
2523
self.buffer = None
24+
self.data_files: Dict[str, bytes] = {}
2625

2726
def stage_type(self) -> StageType:
2827
return StageType.SERIALIZE
2928

3029
def run(self, artifact: ExecutorchProgramManager, inputs=None) -> None:
3130
self.buffer = artifact.buffer
31+
# Capture external data files (e.g., .ptd files for CUDA backend)
32+
self.data_files = artifact.data_files
3233

3334
@property
3435
def artifact(self) -> bytes:
@@ -40,8 +41,29 @@ def graph_module(self) -> None:
4041

4142
def run_artifact(self, inputs):
4243
inputs_flattened, _ = tree_flatten(inputs)
44+
45+
# Combine all external data files into a single buffer for data_map_buffer
46+
# Most backends have at most one external data file, but we concatenate
47+
# in case there are multiple (though this may not be fully supported)
48+
data_map_buffer = None
49+
if self.data_files:
50+
# If there's exactly one data file, use it directly
51+
# Otherwise, log a warning - multiple external files may need special handling
52+
if len(self.data_files) == 1:
53+
data_map_buffer = list(self.data_files.values())[0]
54+
else:
55+
# For multiple files, we use the first one and warn
56+
# This is a limitation - proper handling would need runtime support
57+
logger.warning(
58+
f"Multiple external data files found ({list(self.data_files.keys())}). "
59+
f"Using the first one. This may not work correctly for all backends."
60+
)
61+
data_map_buffer = list(self.data_files.values())[0]
62+
4363
executorch_module = _load_for_executorch_from_buffer(
44-
self.buffer, program_verification=Verification.Minimal
64+
self.buffer,
65+
data_map_buffer=data_map_buffer,
66+
program_verification=Verification.Minimal,
4567
)
4668
executorch_output = copy.deepcopy(
4769
executorch_module.run_method("forward", tuple(inputs_flattened))

backends/test/suite/conftest.py

Lines changed: 8 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -3,7 +3,6 @@
33

44
import pytest
55
import torch
6-
76
from executorch.backends.test.suite.flow import all_flows
87
from executorch.backends.test.suite.reporting import _sum_op_counts
98
from executorch.backends.test.suite.runner import run_test
@@ -88,7 +87,14 @@ def lower_and_run_model(
8887
ids=str,
8988
)
9089
def test_runner(request):
91-
return TestRunner(request.param, request.node.name, request.node.originalname)
90+
flow = request.param
91+
test_name = request.node.name
92+
93+
# Check if this test should be skipped based on the flow's skip_patterns
94+
if flow.should_skip_test(test_name):
95+
pytest.skip(f"Test '{test_name}' matches skip pattern for flow '{flow.name}'")
96+
97+
return TestRunner(flow, test_name, request.node.originalname)
9298

9399

94100
@pytest.hookimpl(optionalhook=True)

0 commit comments

Comments
 (0)