Skip to content

Commit 05b0c84

Browse files
Apply suggestions from code review
Co-authored-by: Johannes Gäßler <johannesg@5d6.de>
1 parent bca0c0b commit 05b0c84

File tree

2 files changed

+4
-4
lines changed

2 files changed

+4
-4
lines changed

ggml/src/ggml-cuda/mmq.cuh

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -362,7 +362,7 @@ template <int mmq_y, bool need_check> static __device__ __forceinline__ void loa
362362
x_qs[i*MMQ_MMA_TILE_X_K_Q8_0 + dst_offset + j] = unpacked_bytes[j];
363363
#else
364364
x_qs[i*(2*MMQ_TILE_NE_K + 1) + dst_offset + j] = unpacked_bytes[j];
365-
#endif
365+
#endif // defined(AMD_MFMA_AVAILABLE) || defined(TURING_MMA_AVAILABLE) || defined(AMD_WMMA_AVAILABLE)
366366
}
367367
}
368368

@@ -383,7 +383,7 @@ template <int mmq_y, bool need_check> static __device__ __forceinline__ void loa
383383
x_df[i*MMQ_MMA_TILE_X_K_Q8_0 + ksx] = bxi->d;
384384
#else
385385
x_df[i*(2*MMQ_TILE_NE_K/QI8_0) + i/(QI8_0/2) + ksx] = bxi->d;
386-
#endif
386+
#endif // defined(AMD_MFMA_AVAILABLE) || defined(TURING_MMA_AVAILABLE) || defined(AMD_WMMA_AVAILABLE)
387387
}
388388
}
389389

ggml/src/ggml-cuda/vecdotq.cuh

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -713,8 +713,8 @@ static __device__ __forceinline__ float vec_dot_q1_0_q8_1(
713713
}
714714

715715
// Apply Q1_0's single scale and this chunk's Q8_1 scale
716-
const float2 ds8f = __half22float2(bq8_1_chunk->ds);
717-
return d1 * ds8f.x * sumi;
716+
const float d8 = __low2float(bq8_1_chunk->ds);
717+
return d1 * d8 * sumi;
718718
}
719719

720720
static __device__ __forceinline__ float vec_dot_q4_0_q8_1(

0 commit comments

Comments
 (0)