Skip to content

Commit 5db89b9

Browse files
committed
Merge branch 'master' into concedo_experimental
# Conflicts: # .gitignore # CMakeLists.txt # Makefile # README.md # build.zig # ggml-opencl.cpp # tests/CMakeLists.txt # tests/test-double-float.cpp # tests/test-sampling.cpp
2 parents 98d1dba + 6961c4b commit 5db89b9

Some content is hidden

Large Commits have some content hidden by default. Use the searchbox below for content that may be hidden.

72 files changed

+5340
-7739
lines changed
Lines changed: 4 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -1,8 +1,7 @@
11
---
2-
name: Issue and enhancement template
3-
about: Used to report issues and request enhancements for llama.cpp
4-
title: "[User] Insert summary of your issue or enhancement.."
5-
labels: ''
2+
name: Bug template
3+
about: Used to report bugs in llama.cpp
4+
labels: ["bug"]
65
assignees: ''
76

87
---
@@ -46,7 +45,7 @@ $ g++ --version
4645

4746
# Failure Information (for bugs)
4847

49-
Please help provide information about the failure if this is a bug. If it is not a bug, please remove the rest of this template.
48+
Please help provide information about the failure / bug.
5049

5150
# Steps to Reproduce
5251

Lines changed: 28 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,28 @@
1+
---
2+
name: Enhancement template
3+
about: Used to request enhancements for llama.cpp
4+
labels: ["enhancement"]
5+
assignees: ''
6+
7+
---
8+
9+
# Prerequisites
10+
11+
Please answer the following questions for yourself before submitting an issue.
12+
13+
- [ ] I am running the latest code. Development is very rapid so there are no tagged versions as of now.
14+
- [ ] I carefully followed the [README.md](https://github.com/ggerganov/llama.cpp/blob/master/README.md).
15+
- [ ] I [searched using keywords relevant to my issue](https://docs.github.com/en/issues/tracking-your-work-with-issues/filtering-and-searching-issues-and-pull-requests) to make sure that I am creating a new issue that is not already open (or closed).
16+
- [ ] I reviewed the [Discussions](https://github.com/ggerganov/llama.cpp/discussions), and have a new bug or useful enhancement to share.
17+
18+
# Feature Description
19+
20+
Please provide a detailed written description of what you were trying to do, and what you expected `llama.cpp` to do as an enhancement.
21+
22+
# Motivation
23+
24+
Please provide a detailed written description of reasons why this feature is necessary and how it is useful to `llama.cpp` users.
25+
26+
# Possible Implementation
27+
28+
If you have an idea as to how it can be implemented, please write a detailed description. Feel free to give links to external sources or share visuals that might be helpful to understand the details better.

common/common.cpp

Lines changed: 39 additions & 37 deletions
Original file line numberDiff line numberDiff line change
@@ -107,7 +107,7 @@ bool gpt_params_parse(int argc, char ** argv, gpt_params & params) {
107107
std::string arg;
108108
gpt_params default_params;
109109
const std::string arg_prefix = "--";
110-
llama_sampling_params & sparams = params.sampling_params;
110+
llama_sampling_params & sparams = params.sparams;
111111

112112
for (int i = 1; i < argc; i++) {
113113
arg = argv[i];
@@ -241,25 +241,26 @@ bool gpt_params_parse(int argc, char ** argv, gpt_params & params) {
241241
invalid_param = true;
242242
break;
243243
}
244-
sparams.repeat_last_n = std::stoi(argv[i]);
244+
sparams.penalty_last_n = std::stoi(argv[i]);
245+
sparams.n_prev = std::max(sparams.n_prev, sparams.penalty_last_n);
245246
} else if (arg == "--repeat-penalty") {
246247
if (++i >= argc) {
247248
invalid_param = true;
248249
break;
249250
}
250-
sparams.repeat_penalty = std::stof(argv[i]);
251+
sparams.penalty_repeat = std::stof(argv[i]);
251252
} else if (arg == "--frequency-penalty") {
252253
if (++i >= argc) {
253254
invalid_param = true;
254255
break;
255256
}
256-
sparams.frequency_penalty = std::stof(argv[i]);
257+
sparams.penalty_freq = std::stof(argv[i]);
257258
} else if (arg == "--presence-penalty") {
258259
if (++i >= argc) {
259260
invalid_param = true;
260261
break;
261262
}
262-
sparams.presence_penalty = std::stof(argv[i]);
263+
sparams.penalty_present = std::stof(argv[i]);
263264
} else if (arg == "--mirostat") {
264265
if (++i >= argc) {
265266
invalid_param = true;
@@ -572,7 +573,7 @@ bool gpt_params_parse(int argc, char ** argv, gpt_params & params) {
572573
invalid_param = true;
573574
break;
574575
}
575-
params.grammar = argv[i];
576+
sparams.grammar = argv[i];
576577
} else if (arg == "--grammar-file") {
577578
if (++i >= argc) {
578579
invalid_param = true;
@@ -587,7 +588,7 @@ bool gpt_params_parse(int argc, char ** argv, gpt_params & params) {
587588
std::copy(
588589
std::istreambuf_iterator<char>(file),
589590
std::istreambuf_iterator<char>(),
590-
std::back_inserter(params.grammar)
591+
std::back_inserter(sparams.grammar)
591592
);
592593
#ifndef LOG_DISABLE_LOGS
593594
// Parse args for logging parameters
@@ -631,6 +632,7 @@ bool gpt_params_parse(int argc, char ** argv, gpt_params & params) {
631632
process_escapes(params.prompt);
632633
process_escapes(params.input_prefix);
633634
process_escapes(params.input_suffix);
635+
process_escapes(sparams.cfg_negative_prompt);
634636
for (auto & antiprompt : params.antiprompt) {
635637
process_escapes(antiprompt);
636638
}
@@ -640,7 +642,7 @@ bool gpt_params_parse(int argc, char ** argv, gpt_params & params) {
640642
}
641643

642644
void gpt_print_usage(int /*argc*/, char ** argv, const gpt_params & params) {
643-
const llama_sampling_params & sparams = params.sampling_params;
645+
const llama_sampling_params & sparams = params.sparams;
644646

645647
printf("usage: %s [options]\n", argv[0]);
646648
printf("\n");
@@ -678,10 +680,10 @@ void gpt_print_usage(int /*argc*/, char ** argv, const gpt_params & params) {
678680
printf(" --top-p N top-p sampling (default: %.1f, 1.0 = disabled)\n", (double)sparams.top_p);
679681
printf(" --tfs N tail free sampling, parameter z (default: %.1f, 1.0 = disabled)\n", (double)sparams.tfs_z);
680682
printf(" --typical N locally typical sampling, parameter p (default: %.1f, 1.0 = disabled)\n", (double)sparams.typical_p);
681-
printf(" --repeat-last-n N last n tokens to consider for penalize (default: %d, 0 = disabled, -1 = ctx_size)\n", sparams.repeat_last_n);
682-
printf(" --repeat-penalty N penalize repeat sequence of tokens (default: %.1f, 1.0 = disabled)\n", (double)sparams.repeat_penalty);
683-
printf(" --presence-penalty N repeat alpha presence penalty (default: %.1f, 0.0 = disabled)\n", (double)sparams.presence_penalty);
684-
printf(" --frequency-penalty N repeat alpha frequency penalty (default: %.1f, 0.0 = disabled)\n", (double)sparams.frequency_penalty);
683+
printf(" --repeat-last-n N last n tokens to consider for penalize (default: %d, 0 = disabled, -1 = ctx_size)\n", sparams.penalty_last_n);
684+
printf(" --repeat-penalty N penalize repeat sequence of tokens (default: %.1f, 1.0 = disabled)\n", (double)sparams.penalty_repeat);
685+
printf(" --presence-penalty N repeat alpha presence penalty (default: %.1f, 0.0 = disabled)\n", (double)sparams.penalty_present);
686+
printf(" --frequency-penalty N repeat alpha frequency penalty (default: %.1f, 0.0 = disabled)\n", (double)sparams.penalty_freq);
685687
printf(" --mirostat N use Mirostat sampling.\n");
686688
printf(" Top K, Nucleus, Tail Free and Locally Typical samplers are ignored if used.\n");
687689
printf(" (default: %d, 0 = disabled, 1 = Mirostat, 2 = Mirostat 2.0)\n", sparams.mirostat);
@@ -878,13 +880,13 @@ std::tuple<struct llama_model *, struct llama_context *> llama_init_from_gpt_par
878880
}
879881

880882
if (params.ignore_eos) {
881-
params.sampling_params.logit_bias[llama_token_eos(lctx)] = -INFINITY;
883+
params.sparams.logit_bias[llama_token_eos(model)] = -INFINITY;
882884
}
883885

884886
{
885887
LOG("warming up the model with an empty run\n");
886888

887-
std::vector<llama_token> tmp = { llama_token_bos(lctx), llama_token_eos(lctx), };
889+
std::vector<llama_token> tmp = { llama_token_bos(model), llama_token_eos(model), };
888890
llama_decode(lctx, llama_batch_get_one(tmp.data(), std::min(tmp.size(), (size_t) params.n_batch), 0, 0));
889891
llama_kv_cache_tokens_rm(lctx, -1, -1);
890892
llama_reset_timings(lctx);
@@ -939,7 +941,7 @@ std::string llama_token_to_piece(const struct llama_context * ctx, llama_token t
939941
}
940942

941943
std::string llama_detokenize_spm(llama_context * ctx, const std::vector<llama_token> & tokens) {
942-
const llama_token bos_id = llama_token_bos(ctx);
944+
const llama_token bos_id = llama_token_bos(llama_get_model(ctx));
943945

944946
std::string piece;
945947
std::string result;
@@ -1123,28 +1125,28 @@ std::string get_sortable_timestamp() {
11231125

11241126
void dump_non_result_info_yaml(FILE * stream, const gpt_params & params, const llama_context * lctx,
11251127
const std::string & timestamp, const std::vector<int> & prompt_tokens, const char * model_desc) {
1126-
const llama_sampling_params & sparams = params.sampling_params;
1128+
const llama_sampling_params & sparams = params.sparams;
11271129

11281130
fprintf(stream, "build_commit: %s\n", BUILD_COMMIT);
11291131
fprintf(stream, "build_number: %d\n", BUILD_NUMBER);
1130-
fprintf(stream, "cpu_has_arm_fma: %s\n", ggml_cpu_has_arm_fma() ? "true" : "false");
1131-
fprintf(stream, "cpu_has_avx: %s\n", ggml_cpu_has_avx() ? "true" : "false");
1132-
fprintf(stream, "cpu_has_avx2: %s\n", ggml_cpu_has_avx2() ? "true" : "false");
1133-
fprintf(stream, "cpu_has_avx512: %s\n", ggml_cpu_has_avx512() ? "true" : "false");
1132+
fprintf(stream, "cpu_has_arm_fma: %s\n", ggml_cpu_has_arm_fma() ? "true" : "false");
1133+
fprintf(stream, "cpu_has_avx: %s\n", ggml_cpu_has_avx() ? "true" : "false");
1134+
fprintf(stream, "cpu_has_avx2: %s\n", ggml_cpu_has_avx2() ? "true" : "false");
1135+
fprintf(stream, "cpu_has_avx512: %s\n", ggml_cpu_has_avx512() ? "true" : "false");
11341136
fprintf(stream, "cpu_has_avx512_vbmi: %s\n", ggml_cpu_has_avx512_vbmi() ? "true" : "false");
11351137
fprintf(stream, "cpu_has_avx512_vnni: %s\n", ggml_cpu_has_avx512_vnni() ? "true" : "false");
1136-
fprintf(stream, "cpu_has_blas: %s\n", ggml_cpu_has_blas() ? "true" : "false");
1137-
fprintf(stream, "cpu_has_cublas: %s\n", ggml_cpu_has_cublas() ? "true" : "false");
1138-
fprintf(stream, "cpu_has_clblast: %s\n", ggml_cpu_has_clblast() ? "true" : "false");
1139-
fprintf(stream, "cpu_has_fma: %s\n", ggml_cpu_has_fma() ? "true" : "false");
1140-
fprintf(stream, "cpu_has_gpublas: %s\n", ggml_cpu_has_gpublas() ? "true" : "false");
1141-
fprintf(stream, "cpu_has_neon: %s\n", ggml_cpu_has_neon() ? "true" : "false");
1142-
fprintf(stream, "cpu_has_f16c: %s\n", ggml_cpu_has_f16c() ? "true" : "false");
1143-
fprintf(stream, "cpu_has_fp16_va: %s\n", ggml_cpu_has_fp16_va() ? "true" : "false");
1144-
fprintf(stream, "cpu_has_wasm_simd: %s\n", ggml_cpu_has_wasm_simd() ? "true" : "false");
1145-
fprintf(stream, "cpu_has_blas: %s\n", ggml_cpu_has_blas() ? "true" : "false");
1146-
fprintf(stream, "cpu_has_sse3: %s\n", ggml_cpu_has_sse3() ? "true" : "false");
1147-
fprintf(stream, "cpu_has_vsx: %s\n", ggml_cpu_has_vsx() ? "true" : "false");
1138+
fprintf(stream, "cpu_has_blas: %s\n", ggml_cpu_has_blas() ? "true" : "false");
1139+
fprintf(stream, "cpu_has_cublas: %s\n", ggml_cpu_has_cublas() ? "true" : "false");
1140+
fprintf(stream, "cpu_has_clblast: %s\n", ggml_cpu_has_clblast() ? "true" : "false");
1141+
fprintf(stream, "cpu_has_fma: %s\n", ggml_cpu_has_fma() ? "true" : "false");
1142+
fprintf(stream, "cpu_has_gpublas: %s\n", ggml_cpu_has_gpublas() ? "true" : "false");
1143+
fprintf(stream, "cpu_has_neon: %s\n", ggml_cpu_has_neon() ? "true" : "false");
1144+
fprintf(stream, "cpu_has_f16c: %s\n", ggml_cpu_has_f16c() ? "true" : "false");
1145+
fprintf(stream, "cpu_has_fp16_va: %s\n", ggml_cpu_has_fp16_va() ? "true" : "false");
1146+
fprintf(stream, "cpu_has_wasm_simd: %s\n", ggml_cpu_has_wasm_simd() ? "true" : "false");
1147+
fprintf(stream, "cpu_has_blas: %s\n", ggml_cpu_has_blas() ? "true" : "false");
1148+
fprintf(stream, "cpu_has_sse3: %s\n", ggml_cpu_has_sse3() ? "true" : "false");
1149+
fprintf(stream, "cpu_has_vsx: %s\n", ggml_cpu_has_vsx() ? "true" : "false");
11481150

11491151
#ifdef NDEBUG
11501152
fprintf(stream, "debug: false\n");
@@ -1178,13 +1180,13 @@ void dump_non_result_info_yaml(FILE * stream, const gpt_params & params, const l
11781180
fprintf(stream, "ctx_size: %d # default: 512\n", params.n_ctx);
11791181
fprintf(stream, "escape: %s # default: false\n", params.escape ? "true" : "false");
11801182
fprintf(stream, "file: # never logged, see prompt instead. Can still be specified for input.\n");
1181-
fprintf(stream, "frequency_penalty: %f # default: 0.0 \n", sparams.frequency_penalty);
1182-
dump_string_yaml_multiline(stream, "grammar", params.grammar.c_str());
1183+
fprintf(stream, "frequency_penalty: %f # default: 0.0 \n", sparams.penalty_freq);
1184+
dump_string_yaml_multiline(stream, "grammar", sparams.grammar.c_str());
11831185
fprintf(stream, "grammar-file: # never logged, see grammar instead. Can still be specified for input.\n");
11841186
fprintf(stream, "hellaswag: %s # default: false\n", params.hellaswag ? "true" : "false");
11851187
fprintf(stream, "hellaswag_tasks: %zu # default: 400\n", params.hellaswag_tasks);
11861188

1187-
const auto logit_bias_eos = sparams.logit_bias.find(llama_token_eos(lctx));
1189+
const auto logit_bias_eos = sparams.logit_bias.find(llama_token_eos(llama_get_model(lctx)));
11881190
const bool ignore_eos = logit_bias_eos != sparams.logit_bias.end() && logit_bias_eos->second == -INFINITY;
11891191
fprintf(stream, "ignore_eos: %s # default: false\n", ignore_eos ? "true" : "false");
11901192

@@ -1238,14 +1240,14 @@ void dump_non_result_info_yaml(FILE * stream, const gpt_params & params, const l
12381240
fprintf(stream, "numa: %s # default: false\n", params.numa ? "true" : "false");
12391241
fprintf(stream, "ppl_output_type: %d # default: 0\n", params.ppl_output_type);
12401242
fprintf(stream, "ppl_stride: %d # default: 0\n", params.ppl_stride);
1241-
fprintf(stream, "presence_penalty: %f # default: 0.0\n", sparams.presence_penalty);
1243+
fprintf(stream, "presence_penalty: %f # default: 0.0\n", sparams.penalty_present);
12421244
dump_string_yaml_multiline(stream, "prompt", params.prompt.c_str());
12431245
fprintf(stream, "prompt_cache: %s\n", params.path_prompt_cache.c_str());
12441246
fprintf(stream, "prompt_cache_all: %s # default: false\n", params.prompt_cache_all ? "true" : "false");
12451247
fprintf(stream, "prompt_cache_ro: %s # default: false\n", params.prompt_cache_ro ? "true" : "false");
12461248
dump_vector_int_yaml(stream, "prompt_tokens", prompt_tokens);
12471249
fprintf(stream, "random_prompt: %s # default: false\n", params.random_prompt ? "true" : "false");
1248-
fprintf(stream, "repeat_penalty: %f # default: 1.1\n", sparams.repeat_penalty);
1250+
fprintf(stream, "repeat_penalty: %f # default: 1.1\n", sparams.penalty_repeat);
12491251

12501252
fprintf(stream, "reverse_prompt:\n");
12511253
for (std::string ap : params.antiprompt) {

common/common.h

Lines changed: 1 addition & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -69,7 +69,7 @@ struct gpt_params {
6969
float mirostat_tau = 5.00f; // target entropy
7070
float mirostat_eta = 0.10f; // learning rate
7171
// // sampling parameters
72-
struct llama_sampling_params sampling_params;
72+
struct llama_sampling_params sparams;
7373

7474
std::string model = "models/7B/ggml-model-f16.gguf"; // model path
7575
std::string model_draft = ""; // draft model for speculative decoding
@@ -79,7 +79,6 @@ struct gpt_params {
7979
std::string path_prompt_cache = ""; // path to file for saving/loading prompt eval state
8080
std::string input_prefix = ""; // string to prefix user inputs with
8181
std::string input_suffix = ""; // string to suffix user inputs with
82-
std::string grammar = ""; // optional BNF-like grammar to constrain sampling
8382
std::vector<std::string> antiprompt; // string upon seeing which more user input is prompted
8483
std::string logdir = ""; // directory in which to save YAML log files
8584

common/grammar-parser.cpp

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -399,7 +399,7 @@ namespace grammar_parser {
399399
void print_grammar(FILE * file, const parse_state & state) {
400400
try {
401401
std::map<uint32_t, std::string> symbol_id_names;
402-
for (auto kv : state.symbol_ids) {
402+
for (const auto & kv : state.symbol_ids) {
403403
symbol_id_names[kv.second] = kv.first;
404404
}
405405
for (size_t i = 0, end = state.rules.size(); i < end; i++) {

common/log.h

Lines changed: 18 additions & 17 deletions
Original file line numberDiff line numberDiff line change
@@ -97,22 +97,23 @@
9797
#define LOG_TEE_TARGET stderr
9898
#endif
9999

100+
// NOTE: currently disabled as it produces too many log files
100101
// Utility to obtain "pid" like unique process id and use it when creating log files.
101-
inline std::string log_get_pid()
102-
{
103-
static std::string pid;
104-
if (pid.empty())
105-
{
106-
// std::this_thread::get_id() is the most portable way of obtaining a "process id"
107-
// it's not the same as "pid" but is unique enough to solve multiple instances
108-
// trying to write to the same log.
109-
std::stringstream ss;
110-
ss << std::this_thread::get_id();
111-
pid = ss.str();
112-
}
113-
114-
return pid;
115-
}
102+
//inline std::string log_get_pid()
103+
//{
104+
// static std::string pid;
105+
// if (pid.empty())
106+
// {
107+
// // std::this_thread::get_id() is the most portable way of obtaining a "process id"
108+
// // it's not the same as "pid" but is unique enough to solve multiple instances
109+
// // trying to write to the same log.
110+
// std::stringstream ss;
111+
// ss << std::this_thread::get_id();
112+
// pid = ss.str();
113+
// }
114+
//
115+
// return pid;
116+
//}
116117

117118
// Utility function for generating log file names with unique id based on thread id.
118119
// invocation with log_filename_generator( "llama", "log" ) creates a string "llama.<number>.log"
@@ -126,8 +127,8 @@ inline std::string log_filename_generator_impl(const std::string & log_file_base
126127
std::stringstream buf;
127128

128129
buf << log_file_basename;
129-
buf << ".";
130-
buf << log_get_pid();
130+
//buf << ".";
131+
//buf << log_get_pid();
131132
buf << ".";
132133
buf << log_file_extension;
133134

0 commit comments

Comments
 (0)