Skip to content

Commit 1cdcb30

Browse files
committed
another attempt to tip the scales, part 2
1 parent 8f647b7 commit 1cdcb30

1 file changed

Lines changed: 2 additions & 6 deletions

File tree

ggml/src/ggml-cuda/common.cuh

Lines changed: 2 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -136,10 +136,6 @@ static int ggml_cuda_highest_compiled_arch(const int arch) {
136136
#ifndef KCPP_LIMIT_CUDA_MAX_ARCH
137137
return arch;
138138
#else
139-
if(arch==GGML_CUDA_CC_VOLTA) //fix for kcpp, if volta try return 610 instead
140-
{
141-
return GGML_CUDA_CC_DP4A;
142-
}
143139
return (arch > KCPP_LIMIT_CUDA_MAX_ARCH ? KCPP_LIMIT_CUDA_MAX_ARCH : arch);
144140
#endif
145141
}
@@ -227,7 +223,7 @@ static const char * cu_get_error_str(CUresult err) {
227223
#define FP16_AVAILABLE
228224
#endif // defined(GGML_USE_HIP) || defined(GGML_USE_MUSA) || __CUDA_ARCH__ >= GGML_CUDA_CC_PASCAL
229225

230-
#if defined(FP16_AVAILABLE) && __CUDA_ARCH__ != 610
226+
#if defined(FP16_AVAILABLE) && __CUDA_ARCH__ > GGML_CUDA_CC_VOLTA
231227
#define FAST_FP16_AVAILABLE
232228
#endif // defined(FP16_AVAILABLE) && __CUDA_ARCH__ != 610
233229

@@ -267,7 +263,7 @@ static bool fp16_available(const int cc) {
267263

268264
static bool fast_fp16_available(const int cc) {
269265
return GGML_CUDA_CC_IS_AMD(cc) ||
270-
(GGML_CUDA_CC_IS_NVIDIA(cc) && fp16_available(cc) && cc > 610) ||
266+
(GGML_CUDA_CC_IS_NVIDIA(cc) && fp16_available(cc) && cc > GGML_CUDA_CC_VOLTA) ||
271267
(GGML_CUDA_CC_IS_MTHREADS(cc) && fp16_available(cc));
272268
}
273269

0 commit comments

Comments
 (0)