@@ -57,6 +57,15 @@ int main(int argc, char ** argv) {
5757 const std::string prefill_policy = slice_between (fattn,
5858 " static inline bool ggml_cuda_fattn_prefill_mma_can_materialize_turbo_k_classic_v" ,
5959 " // Shape guard for the effective K/V pair after Turbo V decode-dequant." );
60+ const std::string classic_non_q8 = slice_between (fattn,
61+ " static inline bool ggml_cuda_fattn_is_classic_non_q8_type" ,
62+ " static void ggml_cuda_fattn_materialize_to_f16" );
63+ const std::string unsafe_k_helper = slice_between (fattn,
64+ " static inline bool ggml_cuda_fattn_is_turbo_v_decode_unsafe_k_type" ,
65+ " static inline bool ggml_cuda_fattn_effective_vec_shape_unsafe" );
66+ const std::string unsafe_shape = slice_between (fattn,
67+ " static inline bool ggml_cuda_fattn_effective_vec_shape_unsafe" ,
68+ " static void ggml_cuda_flash_attn_ext_vec" );
6069 const std::string exec = slice_between (fattn,
6170 " void ggml_cuda_flash_attn_ext(ggml_backend_cuda_context & ctx, ggml_tensor * dst)" ,
6271 " bool ggml_cuda_flash_attn_ext_support" );
@@ -93,5 +102,17 @@ int main(int argc, char ** argv) {
93102 prefill_policy.find (" ggml_cuda_fattn_is_classic_non_q8_type(V->type)" ) != std::string::npos,
94103 " Turbo K + classic V prefill eligibility must not broaden classic-K/Turbo-V routing" );
95104
105+ ok &= expect (!classic_non_q8.empty () &&
106+ classic_non_q8.find (" GGML_TYPE_Q8_0" ) == std::string::npos,
107+ " classic non-q8 helper must not be broadened to include q8_0" );
108+ ok &= expect (!unsafe_k_helper.empty () &&
109+ unsafe_k_helper.find (" GGML_TYPE_Q8_0" ) != std::string::npos &&
110+ unsafe_k_helper.find (" ggml_cuda_fattn_is_classic_non_q8_type(type)" ) != std::string::npos,
111+ " Turbo V decode unsafe-K policy must cover q8_0 plus classic non-q8 K types" );
112+ ok &= expect (!unsafe_shape.empty () &&
113+ unsafe_shape.find (" ggml_cuda_fattn_is_turbo_v_decode_unsafe_k_type(K->type)" ) != std::string::npos &&
114+ unsafe_shape.find (" V->type == GGML_TYPE_F16" ) != std::string::npos,
115+ " Turbo V decode shape guard must use the unsafe-K policy for effective f16 V" );
116+
96117 return ok ? 0 : 1 ;
97118}
0 commit comments