pytorch · kimishpatel · Apr 6, 2026
@@ -134,6 +134,12 @@ fbcode_target(_kind = runtime.python_binary,
     ],
 )
 
+fbcode_target(_kind = runtime.python_binary,
+    name = "run_bench_on_device",
+    srcs = ["run_bench_on_device.py"],
+    main_function = "executorch.extension.llm.custom_ops.run_bench_on_device.main",
+)
+
 fbcode_target(_kind = cpp_benchmark,
     name = "bench_sdpa",
     srcs = ["bench_sdpa.cpp"],

@@ -1,4 +1,4 @@
 // (c) Meta Platforms, Inc. and affiliates. Confidential and proprietary.

 /*
 * Benchmark for SDPA (scaled dot-product attention) implementations.
@@ -296,13 +296,14 @@
   fill_random(k, gen);
   fill_random(v, gen);
 
-  // Reference: ET custom_sdpa_out (10-param signature, standard layout)
+  // Reference: ET custom_sdpa_out (standard [B,S,H,D] layout)
   Tensor out_ref = tf.zeros(
       {(int32_t)batch, (int32_t)q_seq_len, (int32_t)Hq, (int32_t)D});
   KernelRuntimeContext ctx{};
   torch::executor::native::custom_sdpa_out(
       ctx, q, k, v, start_pos,
       std::nullopt, 0.0, true, std::nullopt,
+      false, false, false,
       out_ref);
 
   // Test: GEMM-based standard SDPA
@@ -473,6 +474,9 @@
         0.0,          // dropout_p
         true,         // is_causal
         std::nullopt, // scale
+        false,        // is_seq_dim_2
+        false,        // is_k_seq_dim_2
+        false,        // is_v_seq_dim_2
         *output_);
   }
 }