diff --git a/.ci/scripts/test_cortex_m_e2e.sh b/.ci/scripts/test_cortex_m_e2e.sh index 6a0a07d3ee5..c6e643f118c 100755 --- a/.ci/scripts/test_cortex_m_e2e.sh +++ b/.ci/scripts/test_cortex_m_e2e.sh @@ -6,76 +6,20 @@ # This source code is licensed under the BSD-style license found in the # LICENSE file in the root directory of this source tree. -# End-to-end test for Cortex-M backend: export a model via aot_arm_compiler -# with cortex-m55+int8 target, then run the .bpte on Corstone-300 FVP. -# -# Usage: bash .ci/scripts/test_cortex_m_e2e.sh -# Example: bash .ci/scripts/test_cortex_m_e2e.sh mv2 +# CI wrapper: export a model for the Cortex-M backend and run it on the +# Corstone-300 FVP via examples/arm/run.sh. The real work (export, runner +# build, FVP launch, Test_result: PASS/FAIL check) is done by run.sh and +# the run_fvp.sh it invokes. -set -eux +set -eu MODEL=$1 -mkdir -p "./cortex_m_e2e/${MODEL}" -WORK_DIR=$(realpath "./cortex_m_e2e/${MODEL}") +script_dir=$(realpath "$(dirname "${BASH_SOURCE[0]}")") +et_root_dir=$(realpath "${script_dir}/../..") -echo "=== Exporting ${MODEL} with cortex-m55+int8 ===" -python -m backends.arm.scripts.aot_arm_compiler \ - -m "${MODEL}" \ +# Quantization is the default for the cortex-m55+int8 target; run.sh's +# arg parser only recognizes --no_quantize, so we omit any explicit flag. +bash "${et_root_dir}/examples/arm/run.sh" \ + --model_name="${MODEL}" \ --target=cortex-m55+int8 \ - --quantize \ - --bundleio \ - --intermediates="${WORK_DIR}/intermediates" \ - --output="${WORK_DIR}/${MODEL}.bpte" - -BPTE="${WORK_DIR}/${MODEL}.bpte" -test -f "${BPTE}" || { echo "FAIL: ${BPTE} not produced"; exit 1; } -echo "=== Exported ${BPTE} ($(stat --printf='%s' "${BPTE}") bytes) ===" - -ELF="arm_test/arm_semihosting_executor_runner_corstone-300/arm_executor_runner" -test -f "${ELF}" || { echo "FAIL: executor runner not found at ${ELF}"; exit 1; } - -LOG_FILE=$(mktemp) - -# Create a tiny dummy input file — the runner requires -i but BundleIO -# ignores it and uses the embedded test inputs instead. -dd if=/dev/zero of="${WORK_DIR}/dummy.bin" bs=4 count=1 2>/dev/null - -echo "=== Running ${MODEL} on Corstone-300 FVP ===" -FVP_Corstone_SSE-300_Ethos-U55 \ - -C ethosu.num_macs=128 \ - -C mps3_board.visualisation.disable-visualisation=1 \ - -C mps3_board.telnetterminal0.start_telnet=0 \ - -C mps3_board.uart0.out_file='-' \ - -C mps3_board.uart0.shutdown_on_eot=1 \ - -C cpu0.semihosting-enable=1 \ - -C cpu0.semihosting-stack_base=0 \ - -C cpu0.semihosting-heap_limit=0 \ - -C "cpu0.semihosting-cwd=${WORK_DIR}" \ - -C "ethosu.extra_args='--fast'" \ - -C "cpu0.semihosting-cmd_line='executor_runner -m ${MODEL}.bpte -i dummy.bin -o out'" \ - -a "${ELF}" \ - --timelimit 300 2>&1 | tee "${LOG_FILE}" || true - -echo "=== Checking FVP output ===" - -if grep -q "Test_result: PASS" "${LOG_FILE}"; then - echo "=== SUCCESS: ${MODEL} e2e BundleIO test PASSED on FVP ===" - rm "${LOG_FILE}" - exit 0 -fi - -if grep -q "Test_result: FAIL" "${LOG_FILE}"; then - echo "FAIL: ${MODEL} BundleIO output mismatch" - rm "${LOG_FILE}" - exit 1 -fi - -if grep -qE "(^[EF][: ].*$)|(^.*Hard fault.*$)|(^.*Assertion.*$)" "${LOG_FILE}"; then - echo "FAIL: ${MODEL} FVP run hit a fatal error" - rm "${LOG_FILE}" - exit 1 -fi - -echo "FAIL: ${MODEL} no BundleIO test result found in FVP output" -rm "${LOG_FILE}" -exit 1 + --bundleio diff --git a/.github/workflows/_test_cortex_m_e2e.yml b/.github/workflows/_test_cortex_m_e2e.yml index 8e7d3269912..6b0398ca998 100644 --- a/.github/workflows/_test_cortex_m_e2e.yml +++ b/.github/workflows/_test_cortex_m_e2e.yml @@ -43,8 +43,5 @@ jobs: .ci/scripts/setup-arm-baremetal-tools.sh source examples/arm/arm-scratch/setup_path.sh - # Build cortex-m test runner with bundled IO support - backends/cortex_m/test/build_test_runner.sh - - # Export model and run on FVP + # Export and run model on FVP (run.sh internally builds the test runner). bash .ci/scripts/test_cortex_m_e2e.sh ${{ matrix.model }} diff --git a/backends/arm/scripts/run_fvp.sh b/backends/arm/scripts/run_fvp.sh index 0808c16d3c7..9f0010189af 100755 --- a/backends/arm/scripts/run_fvp.sh +++ b/backends/arm/scripts/run_fvp.sh @@ -19,6 +19,7 @@ _setup_msg="please refer to ${et_root_dir}/examples/arm/setup.sh to properly ins elf_file="" data_file="" +bundle_file="" target="ethos-u55-128" timeout="600" etrecord_file="" @@ -29,6 +30,7 @@ help() { echo "Options:" echo " --elf= elf file to run" echo " --data=@
Place a file in memory at this address, useful to emulate a PTE flashed into memory instead as part of the code." + echo " --bundle= Bundled program (.bpte) to load via semihosting. Required for cortex-m targets; the FVP launches a semihosting executor_runner that reads the bundle from the host filesystem and checks the embedded reference outputs." echo " --target= Target to build and run for Default: ${target}" echo " --timeout= Maximum target runtime, used to detect hanging, might need to be higer on large models Default: ${timeout}" echo " --etrecord= If ETDump is used you can supply a ETRecord file matching the PTE" @@ -41,6 +43,7 @@ for arg in "$@"; do -h|--help) help ;; --elf=*) elf_file="${arg#*=}";; --data=*) data_file="--data ${arg#*=}";; + --bundle=*) bundle_file="${arg#*=}";; --target=*) target="${arg#*=}";; --timeout=*) timeout="${arg#*=}";; --etrecord=*) etrecord_file="${arg#*=}";; @@ -52,7 +55,9 @@ done elf_file=$(realpath ${elf_file}) -if [[ ${target} == *"ethos-u55"* ]]; then +# cortex-m55 is the only Cortex-M CPU on the Corstone-300 board today; +# cortex-m85 lives on Corstone-320, so it falls through to the SSE-320 FVP. +if [[ ${target} == *"ethos-u55"* || ${target} == cortex-m55* ]]; then fvp_model=FVP_Corstone_SSE-300_Ethos-U55 else fvp_model=FVP_Corstone_SSE-320 @@ -71,7 +76,12 @@ hash ${fvp_model} \ [[ ! -f $elf_file ]] && { echo "[${BASH_SOURCE[0]}]: Unable to find executor_runner elf: ${elf_file}"; exit 1; } -num_macs=$(echo ${target} | cut -d - -f 3) +if [[ ${target} == cortex-m* ]]; then + # Cortex-M CPU-only; the NPU is unused but the FVP still needs a value. + num_macs=128 +else + num_macs=$(echo ${target} | cut -d - -f 3) +fi echo "--------------------------------------------------------------------------------" echo "Running ${elf_file} for ${target} run with FVP:${fvp_model} num_macs:${num_macs} timeout:${timeout}" @@ -97,7 +107,44 @@ if [[ -n "${trace_file}" ]]; then extra_args_u85+=(-C "mps4_board.subsystem.ethosu.extra_args=--pmu-trace ${trace_file}") fi -if [[ ${target} == *"ethos-u55"* ]]; then +if [[ ${target} == cortex-m* ]]; then + [[ -z "${bundle_file}" ]] \ + && { echo "[${BASH_SOURCE[0]}] --bundle= is required for cortex-m targets"; exit 1; } + bundle_file=$(realpath "${bundle_file}") + bundle_dir=$(dirname "${bundle_file}") + bundle_name=$(basename "${bundle_file}") + # Bundled-IO runner needs -i to point at a real file even though + # inputs come from the bundle. + dd if=/dev/zero of="${bundle_dir}/fvp_dummy_input.bin" bs=4 count=1 2>/dev/null + ${nobuf} ${fvp_model} \ + -C ethosu.num_macs=${num_macs} \ + -C mps3_board.visualisation.disable-visualisation=1 \ + -C mps3_board.telnetterminal0.start_telnet=0 \ + -C mps3_board.uart0.out_file='-' \ + -C mps3_board.uart0.shutdown_on_eot=1 \ + -C cpu0.semihosting-enable=1 \ + -C cpu0.semihosting-stack_base=0 \ + -C cpu0.semihosting-heap_limit=0 \ + -C "cpu0.semihosting-cwd=${bundle_dir}" \ + -C "ethosu.extra_args=--fast" \ + -C "cpu0.semihosting-cmd_line=executor_runner -m ${bundle_name} -i fvp_dummy_input.bin -o out" \ + -a "${elf_file}" \ + --timelimit ${timeout} 2>&1 | sed 's/\r$//' | tee ${log_file} || true + echo "[${BASH_SOURCE[0]}] Simulation complete, $?" + if grep -q "Test_result: PASS" "${log_file}"; then + echo "[${BASH_SOURCE[0]}] Bundled I/O check PASSED for ${bundle_name}" + rm "${log_file}" + exit 0 + elif grep -q "Test_result: FAIL" "${log_file}"; then + echo "[${BASH_SOURCE[0]}] Bundled I/O check FAILED for ${bundle_name}" + rm "${log_file}" + exit 1 + else + echo "[${BASH_SOURCE[0]}] No Test_result line found in FVP output for ${bundle_name}" + rm "${log_file}" + exit 1 + fi +elif [[ ${target} == *"ethos-u55"* ]]; then ${nobuf} ${fvp_model} \ -C ethosu.num_macs=${num_macs} \ -C mps3_board.visualisation.disable-visualisation=1 \ diff --git a/backends/cortex_m/README.md b/backends/cortex_m/README.md index daa2447924c..f077814d8a5 100644 --- a/backends/cortex_m/README.md +++ b/backends/cortex_m/README.md @@ -20,6 +20,12 @@ backends/cortex_m/test/build_test_runner.sh # Buil pytest --config-file=backends/arm/test/pytest.ini backends/cortex_m/test # Run tests with correct configuration file ``` +For an end-to-end bundled-IO FVP run of a single model (export → build → FVP → `Test_result: PASS`), use `examples/arm/run.sh`: +``` +examples/arm/run.sh --model_name= --target=cortex-m55+int8 --bundleio +``` +This drives `aot_arm_compiler --bundleio`, invokes `build_test_runner.sh`, and launches the Corstone-300 FVP via `backends/arm/scripts/run_fvp.sh`. + ## Supported operators Refer to `backends/cortex_m/test/ops` for currently supported accelerated ops/dtypes. Additionally, the quantizer targets pure "data-movement ops" such as data copies, slicing and concatenations to use quantized dtypes using the portable-kernels operator library. In general however, operators not supported by Cortex-M are kept in `fp32` using non-accelerated portable-kernels. It is recommended to analyze the graph after lowering to understand how much of the graph has been accelerated. diff --git a/examples/arm/run.sh b/examples/arm/run.sh index 9fb581d6d9b..b18115723b0 100755 --- a/examples/arm/run.sh +++ b/examples/arm/run.sh @@ -126,6 +126,12 @@ if [ "$perf_overlay" = true ] && [ "$model_explorer" != true ]; then exit 1 fi +# Cortex-M backend is an operator-library, not a delegate; force-disable +# --delegate when targeting cortex-m so users don't need --no_delegate. +if [[ ${target} == cortex-m* ]]; then + aot_arm_compiler_flag_delegate="" +fi + if ! [[ ${pte_placement} == "elf" ]]; then if ! [[ "$pte_placement" =~ ^0x[0-9a-fA-F]{1,16}$ ]]; then echo "ERROR: Placing the PTE in memory failed, address is larger then 64bit $pte_placement" @@ -217,6 +223,10 @@ function check_setup () { || { echo "Executorch repo doesn't contain CMakeLists.txt file at root level"; return 1; } backends/arm/scripts/build_executorch.sh --et_build_root="${et_build_root}" --build_type=$build_type $devtools_flag $et_dump_flag --toolchain="${toolchain}" + elif [[ ${target} == cortex-m* ]]; then + # build_test_runner.sh handles toolchain setup; just validate it's on PATH. + hash arm-none-eabi-gcc \ + || { echo "Could not find arm-none-eabi-gcc on PATH, ${_setup_msg}"; return 1; } elif [[ ${target} =~ "vgf" ]]; then model_converter=$(which model-converter) echo "${model_converter}" @@ -347,6 +357,21 @@ for i in "${!test_model[@]}"; do if [[ ${target} == *"TOSA"* ]]; then echo "Build for ${target} skip generating a .elf and running it" + elif [[ ${target} == cortex-m* ]]; then + # Cortex-M backend uses a shared semihosting executor_runner (built + # by build_test_runner.sh) that loads the .bpte at runtime, rather + # than per-model runners with the PTE baked in. + if [ "$bundleio" != true ]; then + echo "Error: --target=${target} requires --bundleio (the cortex-m runner loads bundled inputs via semihosting)" + exit 1 + fi + set -x + backends/cortex_m/test/build_test_runner.sh + cortex_m_elf="${et_root_dir}/arm_test/arm_semihosting_executor_runner_corstone-300/arm_executor_runner" + if [ "$build_only" = false ] ; then + backends/arm/scripts/run_fvp.sh --elf="${cortex_m_elf}" --target="${target}" --bundle="${pte_file}" + fi + set +x elif [[ ${target} == *"vgf"* ]]; then echo "Build and run for VKML, (target: ${target})" set -x