diff --git a/.ci/scripts/test_cortex_m_e2e.sh b/.ci/scripts/test_cortex_m_e2e.sh
index 6a0a07d3ee5..c6e643f118c 100755
--- a/.ci/scripts/test_cortex_m_e2e.sh
+++ b/.ci/scripts/test_cortex_m_e2e.sh
@@ -6,76 +6,20 @@
 # This source code is licensed under the BSD-style license found in the
 # LICENSE file in the root directory of this source tree.
 
-# End-to-end test for Cortex-M backend: export a model via aot_arm_compiler
-# with cortex-m55+int8 target, then run the .bpte on Corstone-300 FVP.
-#
-# Usage: bash .ci/scripts/test_cortex_m_e2e.sh <model_name>
-# Example: bash .ci/scripts/test_cortex_m_e2e.sh mv2
+# CI wrapper: export a model for the Cortex-M backend and run it on the
+# Corstone-300 FVP via examples/arm/run.sh. The real work (export, runner
+# build, FVP launch, Test_result: PASS/FAIL check) is done by run.sh and
+# the run_fvp.sh it invokes.
 
-set -eux
+set -eu
 
 MODEL=$1
-mkdir -p "./cortex_m_e2e/${MODEL}"
-WORK_DIR=$(realpath "./cortex_m_e2e/${MODEL}")
+script_dir=$(realpath "$(dirname "${BASH_SOURCE[0]}")")
+et_root_dir=$(realpath "${script_dir}/../..")
 
-echo "=== Exporting ${MODEL} with cortex-m55+int8 ==="
-python -m backends.arm.scripts.aot_arm_compiler \
-    -m "${MODEL}" \
+# Quantization is the default for the cortex-m55+int8 target; run.sh's
+# arg parser only recognizes --no_quantize, so we omit any explicit flag.
+bash "${et_root_dir}/examples/arm/run.sh" \
+    --model_name="${MODEL}" \
     --target=cortex-m55+int8 \
-    --quantize \
-    --bundleio \
-    --intermediates="${WORK_DIR}/intermediates" \
-    --output="${WORK_DIR}/${MODEL}.bpte"
-
-BPTE="${WORK_DIR}/${MODEL}.bpte"
-test -f "${BPTE}" || { echo "FAIL: ${BPTE} not produced"; exit 1; }
-echo "=== Exported ${BPTE} ($(stat --printf='%s' "${BPTE}") bytes) ==="
-
-ELF="arm_test/arm_semihosting_executor_runner_corstone-300/arm_executor_runner"
-test -f "${ELF}" || { echo "FAIL: executor runner not found at ${ELF}"; exit 1; }
-
-LOG_FILE=$(mktemp)
-
-# Create a tiny dummy input file — the runner requires -i but BundleIO
-# ignores it and uses the embedded test inputs instead.
-dd if=/dev/zero of="${WORK_DIR}/dummy.bin" bs=4 count=1 2>/dev/null
-
-echo "=== Running ${MODEL} on Corstone-300 FVP ==="
-FVP_Corstone_SSE-300_Ethos-U55 \
-    -C ethosu.num_macs=128 \
-    -C mps3_board.visualisation.disable-visualisation=1 \
-    -C mps3_board.telnetterminal0.start_telnet=0 \
-    -C mps3_board.uart0.out_file='-' \
-    -C mps3_board.uart0.shutdown_on_eot=1 \
-    -C cpu0.semihosting-enable=1 \
-    -C cpu0.semihosting-stack_base=0 \
-    -C cpu0.semihosting-heap_limit=0 \
-    -C "cpu0.semihosting-cwd=${WORK_DIR}" \
-    -C "ethosu.extra_args='--fast'" \
-    -C "cpu0.semihosting-cmd_line='executor_runner -m ${MODEL}.bpte -i dummy.bin -o out'" \
-    -a "${ELF}" \
-    --timelimit 300 2>&1 | tee "${LOG_FILE}" || true
-
-echo "=== Checking FVP output ==="
-
-if grep -q "Test_result: PASS" "${LOG_FILE}"; then
-    echo "=== SUCCESS: ${MODEL} e2e BundleIO test PASSED on FVP ==="
-    rm "${LOG_FILE}"
-    exit 0
-fi
-
-if grep -q "Test_result: FAIL" "${LOG_FILE}"; then
-    echo "FAIL: ${MODEL} BundleIO output mismatch"
-    rm "${LOG_FILE}"
-    exit 1
-fi
-
-if grep -qE "(^[EF][: ].*$)|(^.*Hard fault.*$)|(^.*Assertion.*$)" "${LOG_FILE}"; then
-    echo "FAIL: ${MODEL} FVP run hit a fatal error"
-    rm "${LOG_FILE}"
-    exit 1
-fi
-
-echo "FAIL: ${MODEL} no BundleIO test result found in FVP output"
-rm "${LOG_FILE}"
-exit 1
+    --bundleio
diff --git a/.github/workflows/_test_cortex_m_e2e.yml b/.github/workflows/_test_cortex_m_e2e.yml
index 8e7d3269912..6b0398ca998 100644
--- a/.github/workflows/_test_cortex_m_e2e.yml
+++ b/.github/workflows/_test_cortex_m_e2e.yml
@@ -43,8 +43,5 @@ jobs:
         .ci/scripts/setup-arm-baremetal-tools.sh
         source examples/arm/arm-scratch/setup_path.sh
 
-        # Build cortex-m test runner with bundled IO support
-        backends/cortex_m/test/build_test_runner.sh
-
-        # Export model and run on FVP
+        # Export and run model on FVP (run.sh internally builds the test runner).
         bash .ci/scripts/test_cortex_m_e2e.sh ${{ matrix.model }}
diff --git a/backends/arm/scripts/run_fvp.sh b/backends/arm/scripts/run_fvp.sh
index 0808c16d3c7..9f0010189af 100755
--- a/backends/arm/scripts/run_fvp.sh
+++ b/backends/arm/scripts/run_fvp.sh
@@ -19,6 +19,7 @@ _setup_msg="please refer to ${et_root_dir}/examples/arm/setup.sh to properly ins
 
 elf_file=""
 data_file=""
+bundle_file=""
 target="ethos-u55-128"
 timeout="600"
 etrecord_file=""
@@ -29,6 +30,7 @@ help() {
     echo "Options:"
     echo "  --elf=<ELF_FILE>         elf file to run"
     echo "  --data=<FILE>@<ADDRESS>  Place a file in memory at this address, useful to emulate a PTE flashed into memory instead as part of the code."
+    echo "  --bundle=<BPTE_FILE>     Bundled program (.bpte) to load via semihosting. Required for cortex-m targets; the FVP launches a semihosting executor_runner that reads the bundle from the host filesystem and checks the embedded reference outputs."
     echo "  --target=<TARGET>        Target to build and run for Default: ${target}"
     echo "  --timeout=<TIME_IN_SEC>  Maximum target runtime, used to detect hanging, might need to be higer on large models Default: ${timeout}"
     echo "  --etrecord=<FILE>        If ETDump is used you can supply a ETRecord file matching the PTE"
@@ -41,6 +43,7 @@ for arg in "$@"; do
       -h|--help) help ;;
       --elf=*) elf_file="${arg#*=}";;
       --data=*) data_file="--data ${arg#*=}";;
+      --bundle=*) bundle_file="${arg#*=}";;
       --target=*) target="${arg#*=}";;
       --timeout=*) timeout="${arg#*=}";;
       --etrecord=*) etrecord_file="${arg#*=}";;
@@ -52,7 +55,9 @@ done
 
 elf_file=$(realpath ${elf_file})
 
-if [[ ${target} == *"ethos-u55"*  ]]; then
+# cortex-m55 is the only Cortex-M CPU on the Corstone-300 board today;
+# cortex-m85 lives on Corstone-320, so it falls through to the SSE-320 FVP.
+if [[ ${target} == *"ethos-u55"* || ${target} == cortex-m55* ]]; then
     fvp_model=FVP_Corstone_SSE-300_Ethos-U55
 else
     fvp_model=FVP_Corstone_SSE-320
@@ -71,7 +76,12 @@ hash ${fvp_model} \
 
 
 [[ ! -f $elf_file ]] && { echo "[${BASH_SOURCE[0]}]: Unable to find executor_runner elf: ${elf_file}"; exit 1; }
-num_macs=$(echo ${target} | cut -d - -f 3)
+if [[ ${target} == cortex-m* ]]; then
+    # Cortex-M CPU-only; the NPU is unused but the FVP still needs a value.
+    num_macs=128
+else
+    num_macs=$(echo ${target} | cut -d - -f 3)
+fi
 
 echo "--------------------------------------------------------------------------------"
 echo "Running ${elf_file} for ${target} run with FVP:${fvp_model} num_macs:${num_macs} timeout:${timeout}"
@@ -97,7 +107,44 @@ if [[ -n "${trace_file}" ]]; then
     extra_args_u85+=(-C "mps4_board.subsystem.ethosu.extra_args=--pmu-trace ${trace_file}")
 fi
 
-if [[ ${target} == *"ethos-u55"*  ]]; then
+if [[ ${target} == cortex-m* ]]; then
+    [[ -z "${bundle_file}" ]] \
+        && { echo "[${BASH_SOURCE[0]}] --bundle=<BPTE_FILE> is required for cortex-m targets"; exit 1; }
+    bundle_file=$(realpath "${bundle_file}")
+    bundle_dir=$(dirname "${bundle_file}")
+    bundle_name=$(basename "${bundle_file}")
+    # Bundled-IO runner needs -i to point at a real file even though
+    # inputs come from the bundle.
+    dd if=/dev/zero of="${bundle_dir}/fvp_dummy_input.bin" bs=4 count=1 2>/dev/null
+    ${nobuf} ${fvp_model}                                              \
+        -C ethosu.num_macs=${num_macs}                                 \
+        -C mps3_board.visualisation.disable-visualisation=1            \
+        -C mps3_board.telnetterminal0.start_telnet=0                   \
+        -C mps3_board.uart0.out_file='-'                               \
+        -C mps3_board.uart0.shutdown_on_eot=1                          \
+        -C cpu0.semihosting-enable=1                                   \
+        -C cpu0.semihosting-stack_base=0                               \
+        -C cpu0.semihosting-heap_limit=0                               \
+        -C "cpu0.semihosting-cwd=${bundle_dir}"                        \
+        -C "ethosu.extra_args=--fast"                                  \
+        -C "cpu0.semihosting-cmd_line=executor_runner -m ${bundle_name} -i fvp_dummy_input.bin -o out" \
+        -a "${elf_file}"                                               \
+        --timelimit ${timeout} 2>&1 | sed 's/\r$//' | tee ${log_file} || true
+    echo "[${BASH_SOURCE[0]}] Simulation complete, $?"
+    if grep -q "Test_result: PASS" "${log_file}"; then
+        echo "[${BASH_SOURCE[0]}] Bundled I/O check PASSED for ${bundle_name}"
+        rm "${log_file}"
+        exit 0
+    elif grep -q "Test_result: FAIL" "${log_file}"; then
+        echo "[${BASH_SOURCE[0]}] Bundled I/O check FAILED for ${bundle_name}"
+        rm "${log_file}"
+        exit 1
+    else
+        echo "[${BASH_SOURCE[0]}] No Test_result line found in FVP output for ${bundle_name}"
+        rm "${log_file}"
+        exit 1
+    fi
+elif [[ ${target} == *"ethos-u55"*  ]]; then
     ${nobuf} ${fvp_model}                                   \
         -C ethosu.num_macs=${num_macs}                      \
         -C mps3_board.visualisation.disable-visualisation=1 \
diff --git a/backends/cortex_m/README.md b/backends/cortex_m/README.md
index daa2447924c..f077814d8a5 100644
--- a/backends/cortex_m/README.md
+++ b/backends/cortex_m/README.md
@@ -20,6 +20,12 @@ backends/cortex_m/test/build_test_runner.sh                               # Buil
 pytest --config-file=backends/arm/test/pytest.ini backends/cortex_m/test  # Run tests with correct configuration file
 ```
 
+For an end-to-end bundled-IO FVP run of a single model (export → build → FVP → `Test_result: PASS`), use `examples/arm/run.sh`:
+```
+examples/arm/run.sh --model_name=<model> --target=cortex-m55+int8 --bundleio
+```
+This drives `aot_arm_compiler --bundleio`, invokes `build_test_runner.sh`, and launches the Corstone-300 FVP via `backends/arm/scripts/run_fvp.sh`.
+
 ## Supported operators
 Refer to `backends/cortex_m/test/ops` for currently supported accelerated ops/dtypes. Additionally, the quantizer targets pure "data-movement ops" such as data copies, slicing and concatenations to use quantized dtypes using the portable-kernels operator library.
 In general however, operators not supported by Cortex-M are kept in `fp32` using non-accelerated portable-kernels. It is recommended to analyze the graph after lowering to understand how much of the graph has been accelerated.
diff --git a/examples/arm/run.sh b/examples/arm/run.sh
index 9fb581d6d9b..b18115723b0 100755
--- a/examples/arm/run.sh
+++ b/examples/arm/run.sh
@@ -126,6 +126,12 @@ if [ "$perf_overlay" = true ] && [ "$model_explorer" != true ]; then
     exit 1
 fi
 
+# Cortex-M backend is an operator-library, not a delegate; force-disable
+# --delegate when targeting cortex-m so users don't need --no_delegate.
+if [[ ${target} == cortex-m* ]]; then
+    aot_arm_compiler_flag_delegate=""
+fi
+
 if ! [[ ${pte_placement} == "elf" ]]; then
     if ! [[ "$pte_placement" =~ ^0x[0-9a-fA-F]{1,16}$ ]]; then
         echo "ERROR: Placing the PTE in memory failed, address is larger then 64bit $pte_placement"
@@ -217,6 +223,10 @@ function check_setup () {
             || { echo "Executorch repo doesn't contain CMakeLists.txt file at root level"; return 1; }
 
     backends/arm/scripts/build_executorch.sh --et_build_root="${et_build_root}" --build_type=$build_type $devtools_flag $et_dump_flag --toolchain="${toolchain}"
+    elif [[ ${target} == cortex-m* ]]; then
+        # build_test_runner.sh handles toolchain setup; just validate it's on PATH.
+        hash arm-none-eabi-gcc \
+            || { echo "Could not find arm-none-eabi-gcc on PATH, ${_setup_msg}"; return 1; }
     elif [[ ${target} =~ "vgf" ]]; then
         model_converter=$(which model-converter)
         echo "${model_converter}"
@@ -347,6 +357,21 @@ for i in "${!test_model[@]}"; do
 
     if [[ ${target} == *"TOSA"*  ]]; then
         echo "Build for ${target} skip generating a .elf and running it"
+    elif [[ ${target} == cortex-m*  ]]; then
+        # Cortex-M backend uses a shared semihosting executor_runner (built
+        # by build_test_runner.sh) that loads the .bpte at runtime, rather
+        # than per-model runners with the PTE baked in.
+        if [ "$bundleio" != true ]; then
+            echo "Error: --target=${target} requires --bundleio (the cortex-m runner loads bundled inputs via semihosting)"
+            exit 1
+        fi
+        set -x
+        backends/cortex_m/test/build_test_runner.sh
+        cortex_m_elf="${et_root_dir}/arm_test/arm_semihosting_executor_runner_corstone-300/arm_executor_runner"
+        if [ "$build_only" = false ] ; then
+            backends/arm/scripts/run_fvp.sh --elf="${cortex_m_elf}" --target="${target}" --bundle="${pte_file}"
+        fi
+        set +x
     elif [[ ${target} == *"vgf"*  ]]; then
         echo "Build and run for VKML, (target: ${target})"
         set -x