Skip to content

Commit 376aaf2

Browse files
committed
Merge branch 'upstream' into concedo_experimental
2 parents 6c937c0 + 9c69907 commit 376aaf2

5 files changed

Lines changed: 46 additions & 11 deletions

File tree

common/download.cpp

Lines changed: 6 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -600,9 +600,12 @@ static hf_cache::hf_file find_best_model(const hf_cache::hf_files & files,
600600
}
601601
}
602602

603-
for (const auto & f : files) {
604-
if (gguf_filename_is_model(f.path)) {
605-
return f;
603+
// fallback to first available model only if tag is empty
604+
if (tag.empty()) {
605+
for (const auto & f : files) {
606+
if (gguf_filename_is_model(f.path)) {
607+
return f;
608+
}
606609
}
607610
}
608611

src/llama-model.cpp

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1394,6 +1394,7 @@ void llama_model::load_hparams(llama_model_loader & ml) {
13941394
ml.get_key(LLM_KV_EMBEDDING_LENGTH_PER_LAYER, hparams.n_embd_per_layer);
13951395
ml.get_key(LLM_KV_ATTENTION_KEY_LENGTH_SWA, hparams.n_embd_head_k_swa);
13961396
ml.get_key(LLM_KV_ATTENTION_VALUE_LENGTH_SWA, hparams.n_embd_head_v_swa);
1397+
ml.get_key(LLM_KV_FINAL_LOGIT_SOFTCAPPING, hparams.f_final_logit_softcapping, false);
13971398

13981399
switch (hparams.n_layer) {
13991400
case 35: type = LLM_TYPE_E2B; break;

src/unicode.cpp

Lines changed: 31 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -753,6 +753,35 @@ static std::vector<size_t> unicode_regex_split_custom_afmoe(const std::string &
753753
return bpe_offsets;
754754
}
755755

756+
// regex: [^\n]+|[\n]+
757+
// splits text into runs of non-newline characters and runs of newline characters
758+
static std::vector<size_t> unicode_regex_split_custom_newlines(const std::string & text, const std::vector<size_t> & offsets) {
759+
std::vector<size_t> bpe_offsets;
760+
bpe_offsets.reserve(offsets.size());
761+
762+
const auto cpts = unicode_cpts_from_utf8(text);
763+
764+
size_t start = 0;
765+
for (auto offset : offsets) {
766+
const size_t offset_ini = start;
767+
const size_t offset_end = start + offset;
768+
assert(offset_end <= cpts.size());
769+
start = offset_end;
770+
771+
size_t pos = offset_ini;
772+
while (pos < offset_end) {
773+
const bool is_newline = (cpts[pos] == '\n');
774+
const size_t run_start = pos;
775+
while (pos < offset_end && (cpts[pos] == '\n') == is_newline) {
776+
pos++;
777+
}
778+
bpe_offsets.push_back(pos - run_start);
779+
}
780+
}
781+
782+
return bpe_offsets;
783+
}
784+
756785
static std::vector<size_t> unicode_regex_split_custom(const std::string & text, const std::string & regex_expr, const std::vector<size_t> & offsets) {
757786
std::vector<size_t> bpe_offsets;
758787

@@ -769,6 +798,8 @@ static std::vector<size_t> unicode_regex_split_custom(const std::string & text,
769798
} else if (regex_expr == "\\p{AFMoE_digits}") {
770799
// AFMOE digit pattern - use custom implementation for proper splitting
771800
bpe_offsets = unicode_regex_split_custom_afmoe(text, offsets);
801+
} else if (regex_expr == "[^\\n]+|[\\n]+") {
802+
bpe_offsets = unicode_regex_split_custom_newlines(text, offsets);
772803
} else if (regex_expr == "\\d{1,3}(?=(?:\\d{3})*\\b)") {
773804
// tiny_aya digit grouping pattern from tokenizer.json:
774805
// {"type": "Split", "pattern": {"Regex": "\\d{1,3}(?=(?:\\d{3})*\\b)"}, "behavior": "Isolated"}

tools/server/server-context.cpp

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -155,8 +155,8 @@ struct server_slot {
155155
int64_t t_start_process_prompt;
156156
int64_t t_start_generation;
157157

158-
double t_prompt_processing; // ms
159-
double t_token_generation; // ms
158+
double t_prompt_processing = 0.0; // ms
159+
double t_token_generation = 0.0; // ms
160160

161161
std::function<void(int /* id_slot */)> callback_on_release;
162162

tools/server/server-task.h

Lines changed: 6 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -261,14 +261,14 @@ struct result_timings {
261261
int32_t cache_n = -1;
262262

263263
int32_t prompt_n = -1;
264-
double prompt_ms;
265-
double prompt_per_token_ms;
266-
double prompt_per_second;
264+
double prompt_ms = 0.0;
265+
double prompt_per_token_ms = 0.0;
266+
double prompt_per_second = 0.0;
267267

268268
int32_t predicted_n = -1;
269-
double predicted_ms;
270-
double predicted_per_token_ms;
271-
double predicted_per_second;
269+
double predicted_ms = 0.0;
270+
double predicted_per_token_ms = 0.0;
271+
double predicted_per_second = 0.0;
272272

273273
// Optional speculative metrics - only included when > 0
274274
int32_t draft_n = 0;

0 commit comments

Comments
 (0)