@@ -141,7 +141,7 @@ static std::string FileFormatTokenizeID(int id, FileFormat file_format)
141141 {
142142 return std::string (llama_v3_token_to_str (llama_ctx_v3, id));
143143 }
144- else if (file_format == FileFormat::GGUF_LLAMA || file_format==FileFormat::GGUF_FALCON )
144+ else if (file_format == FileFormat::GGUF_GENERIC )
145145 {
146146 return std::string (llama_token_to_str (llama_ctx_v4, id));
147147 }
@@ -153,7 +153,7 @@ static std::string FileFormatTokenizeID(int id, FileFormat file_format)
153153
154154static void TokenizeString (const std::string & str_to_tokenize, std::vector<int > & output_tokens, FileFormat file_format)
155155{
156- if (file_format == FileFormat::GGML || file_format == FileFormat::GGHF || file_format == FileFormat::GGJT || file_format == FileFormat::GGJT_2 || file_format == FileFormat::GGJT_3 || file_format == FileFormat::GGUF_LLAMA || file_format==FileFormat::GGUF_FALCON )
156+ if (file_format == FileFormat::GGML || file_format == FileFormat::GGHF || file_format == FileFormat::GGJT || file_format == FileFormat::GGJT_2 || file_format == FileFormat::GGJT_3 || file_format == FileFormat::GGUF_GENERIC )
157157 {
158158 if (file_format == FileFormat::GGHF || file_format == FileFormat::GGJT || file_format == FileFormat::GGJT_2 )
159159 {
@@ -182,9 +182,9 @@ static int GetEosID(FileFormat file_format, int32_t n_vocab)
182182{
183183 unsigned int eosID = 0 ;
184184
185- if (file_format == FileFormat::GGML || file_format == FileFormat::GGHF || file_format == FileFormat::GGJT || file_format == FileFormat::GGJT_2 || file_format == FileFormat::GGJT_3 || file_format == FileFormat::GGUF_LLAMA || file_format==FileFormat::GGUF_FALCON )
185+ if (file_format == FileFormat::GGML || file_format == FileFormat::GGHF || file_format == FileFormat::GGJT || file_format == FileFormat::GGJT_2 || file_format == FileFormat::GGJT_3 || file_format == FileFormat::GGUF_GENERIC )
186186 {
187- if (file_format == FileFormat::GGUF_LLAMA || file_format==FileFormat::GGUF_FALCON )
187+ if (file_format == FileFormat::GGUF_GENERIC )
188188 {
189189 eosID = llama_token_eos (&(llama_ctx_v4->model ));
190190 }
@@ -696,7 +696,7 @@ ModelLoadResult gpttype_load_model(const load_model_inputs inputs, FileFormat in
696696 file_format = in_file_format;
697697 n_threads = kcpp_params->n_threads = inputs.threads ;
698698 n_blasthreads = kcpp_params->n_threads_batch = inputs.blasthreads ;
699- bool isGguf = (file_format == FileFormat::GGUF_LLAMA || file_format==FileFormat::GGUF_FALCON );
699+ bool isGguf = (file_format == FileFormat::GGUF_GENERIC );
700700
701701 n_batch = kcpp_params->n_batch = (isGguf?normalbatchsize:smallbatchsize);
702702 modelname = kcpp_params->model = inputs.model_filename ;
@@ -712,7 +712,7 @@ ModelLoadResult gpttype_load_model(const load_model_inputs inputs, FileFormat in
712712 auto clamped_max_context_length = inputs.max_context_length ;
713713
714714 if (clamped_max_context_length>16384 &&
715- file_format != FileFormat::GGUF_LLAMA && file_format!=FileFormat::GGUF_FALCON )
715+ file_format != FileFormat::GGUF_GENERIC )
716716 {
717717 printf (" Warning: Only GGUF models can use max context above 16k. Max context lowered to 16k.\n " );
718718 clamped_max_context_length = 16384 ;
@@ -748,7 +748,7 @@ ModelLoadResult gpttype_load_model(const load_model_inputs inputs, FileFormat in
748748 {
749749 // approximate NTK aware ctx
750750 auto effectivenctx = kcpp_params->n_ctx ;
751- if ((file_format == FileFormat::GGUF_LLAMA || file_format==FileFormat::GGUF_FALCON ) && file_format_meta.n_ctx_train > 2048 )
751+ if ((file_format == FileFormat::GGUF_GENERIC ) && file_format_meta.n_ctx_train > 2048 )
752752 {
753753 float factor = file_format_meta.n_ctx_train /2048 ;
754754 effectivenctx = effectivenctx/factor;
@@ -781,7 +781,7 @@ ModelLoadResult gpttype_load_model(const load_model_inputs inputs, FileFormat in
781781
782782 printf (" System Info: %s\n " , llama_print_system_info ());
783783 #if defined(GGML_USE_CUBLAS)
784- if (file_format!=FileFormat::GGUF_LLAMA && file_format!=FileFormat::GGUF_FALCON )
784+ if (file_format!=FileFormat::GGUF_GENERIC )
785785 {
786786 if (ggml_v3_cpu_has_gpublas () && cu_parseinfo_maindevice>0 )
787787 {
@@ -915,7 +915,7 @@ ModelLoadResult gpttype_load_model(const load_model_inputs inputs, FileFormat in
915915 }
916916 return ModelLoadResult::SUCCESS;
917917 }
918- else if (file_format==FileFormat::GGUF_LLAMA || file_format==FileFormat::GGUF_FALCON )
918+ else if (file_format==FileFormat::GGUF_GENERIC )
919919 {
920920 llama_model_params model_params = llama_model_default_params ();
921921 llama_context_params llama_ctx_params = llama_context_default_params ();
@@ -932,10 +932,11 @@ ModelLoadResult gpttype_load_model(const load_model_inputs inputs, FileFormat in
932932 model_params.use_mmap = inputs.use_mmap ;
933933 model_params.use_mlock = inputs.use_mlock ;
934934 model_params.n_gpu_layers = inputs.gpulayers ;
935+
935936 #if defined(GGML_USE_CLBLAST)
936- if (file_format==FileFormat::GGUF_FALCON && model_params.n_gpu_layers >0 )
937+ if (file_format==FileFormat::GGUF_GENERIC && (file_format_meta. model_architecture == GGUFArch::FALCON || file_format_meta. model_architecture == GGUFArch::PHI) && model_params.n_gpu_layers >0 )
937938 {
938- printf (" \n GPU layer offload for GGUF FALCON on OpenCL is known to have issues, it has been set to 0 .\n " );
939+ printf (" \n OpenCL does not support GPU Layer offloading for this model architecture! GPU Offload has been disabled .\n " );
939940 model_params.n_gpu_layers = 0 ;
940941 }
941942 #endif
@@ -1642,13 +1643,13 @@ generation_outputs gpttype_generate(const generation_inputs inputs, generation_o
16421643 else
16431644 {
16441645 bool triggersc = useSmartContext;
1645- if (useContextShift && (file_format == FileFormat::GGUF_LLAMA || file_format==FileFormat::GGUF_FALCON ))
1646+ if (useContextShift && (file_format == FileFormat::GGUF_GENERIC ))
16461647 {
16471648 PurgeMissingTokens (llama_ctx_v4, current_context_tokens, embd_inp, inputs.max_length , nctx);
16481649 triggersc = false ;
16491650 }
16501651 ContextFastForward (current_context_tokens, embd_inp, n_past, last_n_tokens, nctx, smartcontext, triggersc, false );
1651- if (file_format == FileFormat::GGUF_LLAMA || file_format==FileFormat::GGUF_FALCON )
1652+ if (file_format == FileFormat::GGUF_GENERIC )
16521653 {
16531654 llama_kv_cache_seq_rm (llama_ctx_v4, 0 , n_past, -1 );
16541655 }
@@ -1669,7 +1670,7 @@ generation_outputs gpttype_generate(const generation_inputs inputs, generation_o
16691670 {
16701671 // for non llama, limit to 256
16711672 int bbs = blasbatchsize;
1672- if (file_format != FileFormat::GGML && file_format != FileFormat::GGHF && file_format != FileFormat::GGJT && file_format != FileFormat::GGJT_2 && file_format != FileFormat::GGJT_3 && file_format != FileFormat::GGUF_LLAMA && file_format!=FileFormat::GGUF_FALCON )
1673+ if (file_format != FileFormat::GGML && file_format != FileFormat::GGHF && file_format != FileFormat::GGJT && file_format != FileFormat::GGJT_2 && file_format != FileFormat::GGJT_3 && file_format != FileFormat::GGUF_GENERIC )
16731674 {
16741675 bbs = (blasbatchsize > 256 ? 256 : blasbatchsize);
16751676 }
@@ -1821,7 +1822,7 @@ generation_outputs gpttype_generate(const generation_inputs inputs, generation_o
18211822 {
18221823 evalres = (llama_v3_eval (llama_ctx_v3, embd.data (), embdsize, n_past, kcpp_params->n_threads )==0 );
18231824 }
1824- else if (file_format == FileFormat::GGUF_LLAMA || file_format==FileFormat::GGUF_FALCON )
1825+ else if (file_format == FileFormat::GGUF_GENERIC )
18251826 {
18261827 evalres = (llama_decode (llama_ctx_v4, llama_batch_get_one (embd.data (), embdsize, n_past, 0 ))==0 );
18271828 }
@@ -1934,9 +1935,9 @@ generation_outputs gpttype_generate(const generation_inputs inputs, generation_o
19341935 float * logitsPtr;
19351936 float lowestLogit = 0 ;
19361937 int btsize = banned_token_ids.size ();
1937- if (file_format == FileFormat::GGML || file_format == FileFormat::GGHF || file_format == FileFormat::GGJT || file_format == FileFormat::GGJT_2 || file_format == FileFormat::GGJT_3 || file_format == FileFormat::GGUF_LLAMA || file_format==FileFormat::GGUF_FALCON )
1938+ if (file_format == FileFormat::GGML || file_format == FileFormat::GGHF || file_format == FileFormat::GGJT || file_format == FileFormat::GGJT_2 || file_format == FileFormat::GGJT_3 || file_format == FileFormat::GGUF_GENERIC )
19381939 {
1939- if (file_format == FileFormat::GGUF_LLAMA || file_format==FileFormat::GGUF_FALCON )
1940+ if (file_format == FileFormat::GGUF_GENERIC )
19401941 {
19411942 logitsPtr = llama_get_logits (llama_ctx_v4);
19421943 }
0 commit comments