@@ -107,7 +107,7 @@ bool gpt_params_parse(int argc, char ** argv, gpt_params & params) {
107107 std::string arg;
108108 gpt_params default_params;
109109 const std::string arg_prefix = " --" ;
110- llama_sampling_params & sparams = params.sampling_params ;
110+ llama_sampling_params & sparams = params.sparams ;
111111
112112 for (int i = 1 ; i < argc; i++) {
113113 arg = argv[i];
@@ -241,25 +241,26 @@ bool gpt_params_parse(int argc, char ** argv, gpt_params & params) {
241241 invalid_param = true ;
242242 break ;
243243 }
244- sparams.repeat_last_n = std::stoi (argv[i]);
244+ sparams.penalty_last_n = std::stoi (argv[i]);
245+ sparams.n_prev = std::max (sparams.n_prev , sparams.penalty_last_n );
245246 } else if (arg == " --repeat-penalty" ) {
246247 if (++i >= argc) {
247248 invalid_param = true ;
248249 break ;
249250 }
250- sparams.repeat_penalty = std::stof (argv[i]);
251+ sparams.penalty_repeat = std::stof (argv[i]);
251252 } else if (arg == " --frequency-penalty" ) {
252253 if (++i >= argc) {
253254 invalid_param = true ;
254255 break ;
255256 }
256- sparams.frequency_penalty = std::stof (argv[i]);
257+ sparams.penalty_freq = std::stof (argv[i]);
257258 } else if (arg == " --presence-penalty" ) {
258259 if (++i >= argc) {
259260 invalid_param = true ;
260261 break ;
261262 }
262- sparams.presence_penalty = std::stof (argv[i]);
263+ sparams.penalty_present = std::stof (argv[i]);
263264 } else if (arg == " --mirostat" ) {
264265 if (++i >= argc) {
265266 invalid_param = true ;
@@ -572,7 +573,7 @@ bool gpt_params_parse(int argc, char ** argv, gpt_params & params) {
572573 invalid_param = true ;
573574 break ;
574575 }
575- params .grammar = argv[i];
576+ sparams .grammar = argv[i];
576577 } else if (arg == " --grammar-file" ) {
577578 if (++i >= argc) {
578579 invalid_param = true ;
@@ -587,7 +588,7 @@ bool gpt_params_parse(int argc, char ** argv, gpt_params & params) {
587588 std::copy (
588589 std::istreambuf_iterator<char >(file),
589590 std::istreambuf_iterator<char >(),
590- std::back_inserter (params .grammar )
591+ std::back_inserter (sparams .grammar )
591592 );
592593#ifndef LOG_DISABLE_LOGS
593594 // Parse args for logging parameters
@@ -631,6 +632,7 @@ bool gpt_params_parse(int argc, char ** argv, gpt_params & params) {
631632 process_escapes (params.prompt );
632633 process_escapes (params.input_prefix );
633634 process_escapes (params.input_suffix );
635+ process_escapes (sparams.cfg_negative_prompt );
634636 for (auto & antiprompt : params.antiprompt ) {
635637 process_escapes (antiprompt);
636638 }
@@ -640,7 +642,7 @@ bool gpt_params_parse(int argc, char ** argv, gpt_params & params) {
640642}
641643
642644void gpt_print_usage (int /* argc*/ , char ** argv, const gpt_params & params) {
643- const llama_sampling_params & sparams = params.sampling_params ;
645+ const llama_sampling_params & sparams = params.sparams ;
644646
645647 printf (" usage: %s [options]\n " , argv[0 ]);
646648 printf (" \n " );
@@ -678,10 +680,10 @@ void gpt_print_usage(int /*argc*/, char ** argv, const gpt_params & params) {
678680 printf (" --top-p N top-p sampling (default: %.1f, 1.0 = disabled)\n " , (double )sparams.top_p );
679681 printf (" --tfs N tail free sampling, parameter z (default: %.1f, 1.0 = disabled)\n " , (double )sparams.tfs_z );
680682 printf (" --typical N locally typical sampling, parameter p (default: %.1f, 1.0 = disabled)\n " , (double )sparams.typical_p );
681- printf (" --repeat-last-n N last n tokens to consider for penalize (default: %d, 0 = disabled, -1 = ctx_size)\n " , sparams.repeat_last_n );
682- printf (" --repeat-penalty N penalize repeat sequence of tokens (default: %.1f, 1.0 = disabled)\n " , (double )sparams.repeat_penalty );
683- printf (" --presence-penalty N repeat alpha presence penalty (default: %.1f, 0.0 = disabled)\n " , (double )sparams.presence_penalty );
684- printf (" --frequency-penalty N repeat alpha frequency penalty (default: %.1f, 0.0 = disabled)\n " , (double )sparams.frequency_penalty );
683+ printf (" --repeat-last-n N last n tokens to consider for penalize (default: %d, 0 = disabled, -1 = ctx_size)\n " , sparams.penalty_last_n );
684+ printf (" --repeat-penalty N penalize repeat sequence of tokens (default: %.1f, 1.0 = disabled)\n " , (double )sparams.penalty_repeat );
685+ printf (" --presence-penalty N repeat alpha presence penalty (default: %.1f, 0.0 = disabled)\n " , (double )sparams.penalty_present );
686+ printf (" --frequency-penalty N repeat alpha frequency penalty (default: %.1f, 0.0 = disabled)\n " , (double )sparams.penalty_freq );
685687 printf (" --mirostat N use Mirostat sampling.\n " );
686688 printf (" Top K, Nucleus, Tail Free and Locally Typical samplers are ignored if used.\n " );
687689 printf (" (default: %d, 0 = disabled, 1 = Mirostat, 2 = Mirostat 2.0)\n " , sparams.mirostat );
@@ -878,13 +880,13 @@ std::tuple<struct llama_model *, struct llama_context *> llama_init_from_gpt_par
878880 }
879881
880882 if (params.ignore_eos ) {
881- params.sampling_params .logit_bias [llama_token_eos (lctx )] = -INFINITY;
883+ params.sparams .logit_bias [llama_token_eos (model )] = -INFINITY;
882884 }
883885
884886 {
885887 LOG (" warming up the model with an empty run\n " );
886888
887- std::vector<llama_token> tmp = { llama_token_bos (lctx ), llama_token_eos (lctx ), };
889+ std::vector<llama_token> tmp = { llama_token_bos (model ), llama_token_eos (model ), };
888890 llama_decode (lctx, llama_batch_get_one (tmp.data (), std::min (tmp.size (), (size_t ) params.n_batch ), 0 , 0 ));
889891 llama_kv_cache_tokens_rm (lctx, -1 , -1 );
890892 llama_reset_timings (lctx);
@@ -939,7 +941,7 @@ std::string llama_token_to_piece(const struct llama_context * ctx, llama_token t
939941}
940942
941943std::string llama_detokenize_spm (llama_context * ctx, const std::vector<llama_token> & tokens) {
942- const llama_token bos_id = llama_token_bos (ctx);
944+ const llama_token bos_id = llama_token_bos (llama_get_model ( ctx) );
943945
944946 std::string piece;
945947 std::string result;
@@ -1123,28 +1125,28 @@ std::string get_sortable_timestamp() {
11231125
11241126void dump_non_result_info_yaml (FILE * stream, const gpt_params & params, const llama_context * lctx,
11251127 const std::string & timestamp, const std::vector<int > & prompt_tokens, const char * model_desc) {
1126- const llama_sampling_params & sparams = params.sampling_params ;
1128+ const llama_sampling_params & sparams = params.sparams ;
11271129
11281130 fprintf (stream, " build_commit: %s\n " , BUILD_COMMIT);
11291131 fprintf (stream, " build_number: %d\n " , BUILD_NUMBER);
1130- fprintf (stream, " cpu_has_arm_fma: %s\n " , ggml_cpu_has_arm_fma () ? " true" : " false" );
1131- fprintf (stream, " cpu_has_avx: %s\n " , ggml_cpu_has_avx () ? " true" : " false" );
1132- fprintf (stream, " cpu_has_avx2: %s\n " , ggml_cpu_has_avx2 () ? " true" : " false" );
1133- fprintf (stream, " cpu_has_avx512: %s\n " , ggml_cpu_has_avx512 () ? " true" : " false" );
1132+ fprintf (stream, " cpu_has_arm_fma: %s\n " , ggml_cpu_has_arm_fma () ? " true" : " false" );
1133+ fprintf (stream, " cpu_has_avx: %s\n " , ggml_cpu_has_avx () ? " true" : " false" );
1134+ fprintf (stream, " cpu_has_avx2: %s\n " , ggml_cpu_has_avx2 () ? " true" : " false" );
1135+ fprintf (stream, " cpu_has_avx512: %s\n " , ggml_cpu_has_avx512 () ? " true" : " false" );
11341136 fprintf (stream, " cpu_has_avx512_vbmi: %s\n " , ggml_cpu_has_avx512_vbmi () ? " true" : " false" );
11351137 fprintf (stream, " cpu_has_avx512_vnni: %s\n " , ggml_cpu_has_avx512_vnni () ? " true" : " false" );
1136- fprintf (stream, " cpu_has_blas: %s\n " , ggml_cpu_has_blas () ? " true" : " false" );
1137- fprintf (stream, " cpu_has_cublas: %s\n " , ggml_cpu_has_cublas () ? " true" : " false" );
1138- fprintf (stream, " cpu_has_clblast: %s\n " , ggml_cpu_has_clblast () ? " true" : " false" );
1139- fprintf (stream, " cpu_has_fma: %s\n " , ggml_cpu_has_fma () ? " true" : " false" );
1140- fprintf (stream, " cpu_has_gpublas: %s\n " , ggml_cpu_has_gpublas () ? " true" : " false" );
1141- fprintf (stream, " cpu_has_neon: %s\n " , ggml_cpu_has_neon () ? " true" : " false" );
1142- fprintf (stream, " cpu_has_f16c: %s\n " , ggml_cpu_has_f16c () ? " true" : " false" );
1143- fprintf (stream, " cpu_has_fp16_va: %s\n " , ggml_cpu_has_fp16_va () ? " true" : " false" );
1144- fprintf (stream, " cpu_has_wasm_simd: %s\n " , ggml_cpu_has_wasm_simd () ? " true" : " false" );
1145- fprintf (stream, " cpu_has_blas: %s\n " , ggml_cpu_has_blas () ? " true" : " false" );
1146- fprintf (stream, " cpu_has_sse3: %s\n " , ggml_cpu_has_sse3 () ? " true" : " false" );
1147- fprintf (stream, " cpu_has_vsx: %s\n " , ggml_cpu_has_vsx () ? " true" : " false" );
1138+ fprintf (stream, " cpu_has_blas: %s\n " , ggml_cpu_has_blas () ? " true" : " false" );
1139+ fprintf (stream, " cpu_has_cublas: %s\n " , ggml_cpu_has_cublas () ? " true" : " false" );
1140+ fprintf (stream, " cpu_has_clblast: %s\n " , ggml_cpu_has_clblast () ? " true" : " false" );
1141+ fprintf (stream, " cpu_has_fma: %s\n " , ggml_cpu_has_fma () ? " true" : " false" );
1142+ fprintf (stream, " cpu_has_gpublas: %s\n " , ggml_cpu_has_gpublas () ? " true" : " false" );
1143+ fprintf (stream, " cpu_has_neon: %s\n " , ggml_cpu_has_neon () ? " true" : " false" );
1144+ fprintf (stream, " cpu_has_f16c: %s\n " , ggml_cpu_has_f16c () ? " true" : " false" );
1145+ fprintf (stream, " cpu_has_fp16_va: %s\n " , ggml_cpu_has_fp16_va () ? " true" : " false" );
1146+ fprintf (stream, " cpu_has_wasm_simd: %s\n " , ggml_cpu_has_wasm_simd () ? " true" : " false" );
1147+ fprintf (stream, " cpu_has_blas: %s\n " , ggml_cpu_has_blas () ? " true" : " false" );
1148+ fprintf (stream, " cpu_has_sse3: %s\n " , ggml_cpu_has_sse3 () ? " true" : " false" );
1149+ fprintf (stream, " cpu_has_vsx: %s\n " , ggml_cpu_has_vsx () ? " true" : " false" );
11481150
11491151#ifdef NDEBUG
11501152 fprintf (stream, " debug: false\n " );
@@ -1178,13 +1180,13 @@ void dump_non_result_info_yaml(FILE * stream, const gpt_params & params, const l
11781180 fprintf (stream, " ctx_size: %d # default: 512\n " , params.n_ctx );
11791181 fprintf (stream, " escape: %s # default: false\n " , params.escape ? " true" : " false" );
11801182 fprintf (stream, " file: # never logged, see prompt instead. Can still be specified for input.\n " );
1181- fprintf (stream, " frequency_penalty: %f # default: 0.0 \n " , sparams.frequency_penalty );
1182- dump_string_yaml_multiline (stream, " grammar" , params .grammar .c_str ());
1183+ fprintf (stream, " frequency_penalty: %f # default: 0.0 \n " , sparams.penalty_freq );
1184+ dump_string_yaml_multiline (stream, " grammar" , sparams .grammar .c_str ());
11831185 fprintf (stream, " grammar-file: # never logged, see grammar instead. Can still be specified for input.\n " );
11841186 fprintf (stream, " hellaswag: %s # default: false\n " , params.hellaswag ? " true" : " false" );
11851187 fprintf (stream, " hellaswag_tasks: %zu # default: 400\n " , params.hellaswag_tasks );
11861188
1187- const auto logit_bias_eos = sparams.logit_bias .find (llama_token_eos (lctx));
1189+ const auto logit_bias_eos = sparams.logit_bias .find (llama_token_eos (llama_get_model ( lctx) ));
11881190 const bool ignore_eos = logit_bias_eos != sparams.logit_bias .end () && logit_bias_eos->second == -INFINITY;
11891191 fprintf (stream, " ignore_eos: %s # default: false\n " , ignore_eos ? " true" : " false" );
11901192
@@ -1238,14 +1240,14 @@ void dump_non_result_info_yaml(FILE * stream, const gpt_params & params, const l
12381240 fprintf (stream, " numa: %s # default: false\n " , params.numa ? " true" : " false" );
12391241 fprintf (stream, " ppl_output_type: %d # default: 0\n " , params.ppl_output_type );
12401242 fprintf (stream, " ppl_stride: %d # default: 0\n " , params.ppl_stride );
1241- fprintf (stream, " presence_penalty: %f # default: 0.0\n " , sparams.presence_penalty );
1243+ fprintf (stream, " presence_penalty: %f # default: 0.0\n " , sparams.penalty_present );
12421244 dump_string_yaml_multiline (stream, " prompt" , params.prompt .c_str ());
12431245 fprintf (stream, " prompt_cache: %s\n " , params.path_prompt_cache .c_str ());
12441246 fprintf (stream, " prompt_cache_all: %s # default: false\n " , params.prompt_cache_all ? " true" : " false" );
12451247 fprintf (stream, " prompt_cache_ro: %s # default: false\n " , params.prompt_cache_ro ? " true" : " false" );
12461248 dump_vector_int_yaml (stream, " prompt_tokens" , prompt_tokens);
12471249 fprintf (stream, " random_prompt: %s # default: false\n " , params.random_prompt ? " true" : " false" );
1248- fprintf (stream, " repeat_penalty: %f # default: 1.1\n " , sparams.repeat_penalty );
1250+ fprintf (stream, " repeat_penalty: %f # default: 1.1\n " , sparams.penalty_repeat );
12491251
12501252 fprintf (stream, " reverse_prompt:\n " );
12511253 for (std::string ap : params.antiprompt ) {
0 commit comments