[ET-VK] Support different input layouts in q8ta_binary operator

ssjia · SS-JIA · commit 673083770696 · 2026-02-19T20:17:01.000-05:00
Previously, the q8ta_binary operator required both inputs to use the same memory layout. This was enforced by using a single `in_layout` specialization constant for both input buffers. However, some models may have inputs with different layouts (e.g., 4W4C and 4C1W) that share the same packed dimension and block size, which should be compatible for binary operations. This change introduces a separate `other_layout` specialization constant for the second input, allowing the shader to correctly load from input_b using its actual layout while input_a continues to use `in_layout`. The C++ side now passes both layout hashes as separate specialization constants to the shader. Differential Revision: [D93768638](https://our.internmc.facebook.com/intern/diff/D93768638/) ghstack-source-id: 342806076 Pull Request resolved: #17563
diff --git a/backends/vulkan/runtime/graph/ops/glsl/q8ta_binary.glsl b/backends/vulkan/runtime/graph/ops/glsl/q8ta_binary.glsl
@@ -46,6 +46,7 @@ layout(local_size_x_id = 0, local_size_y_id = 1, local_size_z_id = 2) in;
 
 ${layout_declare_spec_const(C, "int", "out_layout", "CONTIG_LAYOUT_INT")}
 ${layout_declare_spec_const(C, "int", "in_layout", "CONTIG_LAYOUT_INT")}
+${layout_declare_spec_const(C, "int", "other_layout", "CONTIG_LAYOUT_INT")}
 ${layout_declare_spec_const(C, "int", "block_config", "0")}
 
 // Generate loading functions for input buffers
@@ -71,7 +72,7 @@ void main() {
   ivec4 in_block_a = load_int8x4_block_from_t_in_a(
       in_a_meta, tidx, in_layout, block_outer_dim);
   ivec4 in_block_b = load_int8x4_block_from_t_in_b(
-      in_b_meta, tidx, in_layout, block_outer_dim);
+      in_b_meta, tidx, other_layout, block_outer_dim);
 
   ivec4 out_block;
 
diff --git a/backends/vulkan/runtime/graph/ops/impl/Q8taBinary.cpp b/backends/vulkan/runtime/graph/ops/impl/Q8taBinary.cpp
@@ -42,6 +42,7 @@ void add_q8ta_binary_node(
 
   VK_CHECK_COND(input_a_info.packed_dim == output_info.packed_dim);
   VK_CHECK_COND(input_b_info.packed_dim == output_info.packed_dim);
+
   VK_CHECK_COND(
       input_a_info.packed_dim_block_size == output_info.packed_dim_block_size);
   VK_CHECK_COND(
@@ -105,6 +106,7 @@ void add_q8ta_binary_node(
       // Specialization Constants
       {graph.hashed_layout_of(packed_int8_output),
        graph.hashed_layout_of(packed_int8_input_a),
+       graph.hashed_layout_of(packed_int8_input_b),
        block_config.as_packed_int()},
       // Resize args
       {block_config_ref},