Skip to content

Commit 1fd5f48

Browse files
authored
clean up unused variables warnings (#23975)
1 parent 210a657 commit 1fd5f48

6 files changed

Lines changed: 22 additions & 28 deletions

File tree

ggml/src/ggml-cuda/fattn-mma-f16.cuh

Lines changed: 3 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -568,7 +568,6 @@ static __device__ __forceinline__ void flash_attn_ext_f16_iter(
568568
constexpr bool Q_in_reg = ggml_cuda_fattn_mma_get_Q_in_reg (DKQ, DV, ncols);
569569
constexpr int nstages = ggml_cuda_fattn_mma_get_nstages (DKQ, DV, ncols1, ncols2);
570570

571-
constexpr int stride_tile_Q = DKQ/2 + 4;
572571
constexpr int stride_tile_K = nbatch_K2 + 4;
573572

574573
constexpr int stride_tile_V = V_is_K_view ? stride_tile_K : nbatch_V2 + 4;
@@ -604,9 +603,9 @@ static __device__ __forceinline__ void flash_attn_ext_f16_iter(
604603
#pragma unroll
605604
for (int k0_start = (DKQ/2-1) - (DKQ/2-1) % nbatch_K2; k0_start >= 0; k0_start -= nbatch_K2) {
606605
const int k0_stop = k0_start + nbatch_K2 < DKQ/2 ? k0_start + nbatch_K2 : DKQ/2;
607-
const int k0_diff = k0_stop - k0_start;
608606

609607
if constexpr (nstages <= 1) {
608+
const int k0_diff = k0_stop - k0_start;
610609
constexpr bool use_cp_async = nstages == 1;
611610
flash_attn_ext_f16_load_tile<stride_tile_K, nwarps, nbatch_fa, use_cp_async, oob_check>
612611
(K_h2 + int64_t(k_VKQ_0)*stride_K + k0_start, tile_K, k0_diff, stride_K, k_VKQ_sup);
@@ -640,6 +639,7 @@ static __device__ __forceinline__ void flash_attn_ext_f16_iter(
640639
}
641640
}
642641
} else {
642+
constexpr int stride_tile_Q = DKQ/2 + 4;
643643
#pragma unroll
644644
for (int k_KQ_0 = k0_start; k_KQ_0 < k0_stop; k_KQ_0 += T_A_KQ::J) {
645645
load_ldmatrix(Q_B[0], tile_Q + (threadIdx.y / np)*(T_B_KQ::I*stride_tile_Q) + k_KQ_0, stride_tile_Q);
@@ -954,9 +954,9 @@ static __device__ __forceinline__ void flash_attn_ext_f16_iter(
954954
for (int i0_start = 0; i0_start < DV; i0_start += 2*nbatch_V2) {
955955
static_assert(DV % (2*nbatch_V2) == 0, "bad loop size");
956956
const int i0_stop = i0_start + 2*nbatch_V2;
957-
const int i0_diff = i0_stop - i0_start;
958957

959958
if constexpr (nstages <= 1) {
959+
const int i0_diff = i0_stop - i0_start;
960960
if (!V_is_K_view || i0_stop > 2*nbatch_K2) {
961961
constexpr bool use_cp_async = nstages == 1;
962962
flash_attn_ext_f16_load_tile<stride_tile_V, nwarps, nbatch_fa, use_cp_async, oob_check>

ggml/src/ggml-cuda/gated_delta_net.cu

Lines changed: 5 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -43,7 +43,6 @@ gated_delta_net_cuda(const float * q,
4343
// output state layout (per-slot D * n_seqs) — same per-(seq,head) offset as before.
4444
const int64_t state_in_offset = sequence * K * H * S_v * S_v + h_idx * S_v * S_v;
4545
const int64_t state_out_offset = (sequence * H + h_idx) * S_v * S_v;
46-
const int64_t state_size_per_token = S_v * S_v * H * n_seqs; // per-slot stride in output
4746
state += state_out_offset;
4847
curr_state += state_in_offset + col * S_v;
4948
attn_data += (sequence * n_tokens * H + h_idx) * S_v;
@@ -61,10 +60,6 @@ gated_delta_net_cuda(const float * q,
6160
s_shard[r] = curr_state[i];
6261
}
6362

64-
// slot mapping: target_slot = t - shift. When n_tokens < K only the last n_tokens slots
65-
// are written; earlier slots are left untouched (caller-owned).
66-
const int shift = (int) n_tokens - K;
67-
6863
for (int t = 0; t < n_tokens; t++) {
6964
const float * q_t = q + iq3 * sq3 + t * sq2 + iq1 * sq1;
7065
const float * k_t = k + iq3 * sq3 + t * sq2 + iq1 * sq1;
@@ -148,6 +143,11 @@ gated_delta_net_cuda(const float * q,
148143
attn_data += S_v * H;
149144

150145
if constexpr (keep_rs_t) {
146+
// slot mapping: target_slot = t - shift. When n_tokens < K only the last n_tokens slots
147+
// are written; earlier slots are left untouched (caller-owned).
148+
const int shift = (int) n_tokens - K;
149+
150+
const int64_t state_size_per_token = S_v * S_v * H * n_seqs; // per-slot stride in output
151151
const int target_slot = t - shift;
152152
if (target_slot >= 0 && target_slot < K) {
153153
float * curr_state = (dst + attn_score_elems) + target_slot * state_size_per_token + state_out_offset;

ggml/src/ggml-cuda/mmf.cuh

Lines changed: 3 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -91,7 +91,7 @@ static __global__ void mul_mat_f(
9191
const int row0 = blockIdx.x * rows_per_block;
9292

9393
int expert_idx = 0;
94-
int col_base = 0;
94+
[[maybe_unused]] int col_base = 0;
9595

9696
const int channel_dst = has_ids ? 0 : blockIdx.y;
9797

@@ -122,12 +122,12 @@ static __global__ void mul_mat_f(
122122
ids += col_offset * stride_row_id;
123123
}
124124

125-
const float2 * y2 = (const float2 *) y;
125+
[[maybe_unused]] const float2 * y2 = (const float2 *) y;
126126

127127
extern __shared__ char data_mmv[];
128128

129129
char * shmem_base = data_mmv;
130-
int * slot_map = (int *) shmem_base;
130+
[[maybe_unused]] int * slot_map = (int *) shmem_base;
131131
char * compute_base = has_ids ? (shmem_base + GGML_PAD(cols_per_block, 16) * sizeof(int)) : shmem_base;
132132

133133
tile_C C[ntA][ntB];

ggml/src/ggml-cuda/mmvf.cu

Lines changed: 6 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -80,9 +80,8 @@ static __global__ void mul_mat_vec_f(
8080
gate_x += int64_t(sample_x) *stride_sample_x + channel_x *stride_channel_x + row*stride_row;
8181
}
8282

83-
const int channel_bias = ids ? channel_x : channel_dst;
84-
8583
if constexpr (has_fusion) {
84+
const int channel_bias = ids ? channel_x : channel_dst;
8685
if (use_bias) {
8786
x_bias += int64_t(sample_dst)*stride_sample_dst + channel_bias*stride_channel_dst;
8887
}
@@ -95,7 +94,7 @@ static __global__ void mul_mat_vec_f(
9594

9695
extern __shared__ char data_mmv[];
9796
float * buf_iw = (float *) data_mmv;
98-
float * buf_iw_gate = nullptr;
97+
[[maybe_unused]] float * buf_iw_gate = nullptr;
9998
if constexpr (has_fusion) {
10099
buf_iw_gate = (float *) (data_mmv + warp_size*sizeof(float));
101100
}
@@ -123,7 +122,7 @@ static __global__ void mul_mat_vec_f(
123122

124123
if constexpr (std::is_same_v<T, float>) {
125124
const float2 * x2 = (const float2 *) x;
126-
const float2 * gate_x2 = nullptr;
125+
[[maybe_unused]] const float2 * gate_x2 = nullptr;
127126
if constexpr (has_fusion) {
128127
if (use_gate) {
129128
gate_x2 = (const float2 *) gate_x;
@@ -155,7 +154,7 @@ static __global__ void mul_mat_vec_f(
155154
}
156155
} else if constexpr (std::is_same_v<T, half>) {
157156
const half2 * x2 = (const half2 *) x;
158-
const half2 * gate_x2 = nullptr;
157+
[[maybe_unused]] const half2 * gate_x2 = nullptr;
159158
if constexpr (has_fusion) {
160159
if (use_gate) {
161160
gate_x2 = (const half2 *) gate_x;
@@ -266,15 +265,15 @@ static __global__ void mul_mat_vec_f(
266265
}
267266
#else
268267
const nv_bfloat162 * x2 = (const nv_bfloat162 *) x;
269-
const nv_bfloat162 * gate_x2 = nullptr;
268+
[[maybe_unused]] const nv_bfloat162 * gate_x2 = nullptr;
270269
if constexpr (has_fusion) {
271270
if (use_gate) {
272271
gate_x2 = (const nv_bfloat162 *) gate_x;
273272
}
274273
}
275274
for (int col2 = tid; col2 < ncols2; col2 += block_size) {
276275
const nv_bfloat162 tmpx = x2[col2];
277-
nv_bfloat162 tmpx_gate;
276+
[[maybe_unused]] nv_bfloat162 tmpx_gate;
278277
if constexpr (has_fusion) {
279278
if (use_gate) {
280279
tmpx_gate = gate_x2[col2];

ggml/src/ggml-cuda/mmvq.cu

Lines changed: 4 additions & 9 deletions
Original file line numberDiff line numberDiff line change
@@ -515,7 +515,7 @@ static __global__ void mul_mat_vec_q(
515515
bool use_gate = false;
516516
bool use_bias = false;
517517
bool use_gate_bias = false;
518-
const void * vgate = nullptr;
518+
[[maybe_unused]] const void * vgate = nullptr;
519519
const float * x_bias = nullptr;
520520
const float * gate_bias = nullptr;
521521
ggml_glu_op active_glu;
@@ -531,8 +531,8 @@ static __global__ void mul_mat_vec_q(
531531
}
532532

533533

534-
float x_biases[ncols_dst] = { 0.0f };
535-
float gate_biases[ncols_dst] = { 0.0f };
534+
[[maybe_unused]] float x_biases[ncols_dst] = { 0.0f };
535+
[[maybe_unused]] float gate_biases[ncols_dst] = { 0.0f };
536536
if constexpr (has_fusion) {
537537
const uint32_t channel_bias = ids ? channel_x : channel_dst;
538538
if (use_bias) {
@@ -589,12 +589,7 @@ static __global__ void mul_mat_vec_q(
589589
}
590590

591591
__shared__ float tmp_shared[nwarps-1 > 0 ? nwarps-1 : 1][ncols_dst][rows_per_cuda_block][warp_size];
592-
__shared__ float tmp_shared_gate[(has_fusion && (nwarps-1 > 0)) ? nwarps-1 : 1][ncols_dst][rows_per_cuda_block][warp_size];
593-
if constexpr (!has_fusion) {
594-
(void) tmp_shared_gate;
595-
} else if (!use_gate) {
596-
(void) tmp_shared_gate;
597-
}
592+
[[maybe_unused]] __shared__ float tmp_shared_gate[(has_fusion && (nwarps-1 > 0)) ? nwarps-1 : 1][ncols_dst][rows_per_cuda_block][warp_size];
598593

599594
if (threadIdx.y > 0) {
600595
#pragma unroll

ggml/src/ggml-cuda/topk-moe.cu

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -134,7 +134,7 @@ __launch_bounds__(4 * WARP_SIZE, 1) __global__ void topk_moe_cuda(const float *
134134

135135
// selection_wt is only needed when bias is present (selection uses wt + bias)
136136
// when no bias, we use wt directly for both selection and weight values
137-
float selection_wt[has_bias ? experts_per_thread : 1];
137+
[[maybe_unused]] float selection_wt[has_bias ? experts_per_thread : 1];
138138

139139
if constexpr (has_bias) {
140140
#pragma unroll

0 commit comments

Comments
 (0)