Skip to content

Commit d059021

Browse files
Merge pull request #575 from janhq/update-dev-from-master-2026-06-30-01-12
Sync master with upstream release b9842
2 parents f653ae0 + 6f4f53f commit d059021

34 files changed

Lines changed: 4787 additions & 634 deletions

common/CMakeLists.txt

Lines changed: 0 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -94,10 +94,8 @@ add_library(${TARGET}
9494
peg-parser.h
9595
preset.cpp
9696
preset.h
97-
regex-partial.cpp
9897
reasoning-budget.cpp
9998
reasoning-budget.h
100-
regex-partial.h
10199
sampling.cpp
102100
sampling.h
103101
speculative.cpp

common/preset.cpp

Lines changed: 29 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -7,6 +7,7 @@
77
#include <fstream>
88
#include <sstream>
99
#include <filesystem>
10+
#include <regex>
1011

1112
static std::string rm_leading_dashes(const std::string & str) {
1213
size_t pos = 0;
@@ -16,6 +17,23 @@ static std::string rm_leading_dashes(const std::string & str) {
1617
return str.substr(pos);
1718
}
1819

20+
static std::string canonical_tag(const std::string & tag) {
21+
static const std::regex re_tag("[-.]([A-Z0-9_]+)$", std::regex::icase);
22+
std::smatch m;
23+
if (std::regex_search(tag, m, re_tag)) {
24+
std::string canon = m[1].str();
25+
for (char & c : canon) {
26+
c = (char) std::toupper((unsigned char) c);
27+
}
28+
return canon;
29+
}
30+
std::string upper = tag;
31+
for (char & c : upper) {
32+
c = (char) std::toupper((unsigned char) c);
33+
}
34+
return upper;
35+
}
36+
1937
std::vector<std::string> common_preset::to_args(const std::string & bin_path) const {
2038
std::vector<std::string> args;
2139

@@ -270,11 +288,18 @@ common_presets common_preset_context::load_from_ini(const std::string & path, co
270288

271289
for (auto section : ini_data) {
272290
common_preset preset;
273-
if (section.first.empty()) {
274-
preset.name = COMMON_PRESET_DEFAULT_NAME;
275-
} else {
276-
preset.name = section.first;
291+
std::string section_name = section.first.empty() ? std::string(COMMON_PRESET_DEFAULT_NAME) : section.first;
292+
if (section_name != "*" && section_name != COMMON_PRESET_DEFAULT_NAME) {
293+
auto colon_idx = section_name.rfind(':');
294+
if (colon_idx != std::string::npos) {
295+
std::string tag = section_name.substr(colon_idx + 1);
296+
std::string canon_tag = canonical_tag(tag);
297+
if (canon_tag != tag) {
298+
section_name = section_name.substr(0, colon_idx + 1) + canon_tag;
299+
}
300+
}
277301
}
302+
preset.name = section_name;
278303
LOG_DBG("loading preset: %s\n", preset.name.c_str());
279304
for (const auto & [key, value] : section.second) {
280305
if (key == "version") {

common/regex-partial.cpp

Lines changed: 0 additions & 204 deletions
This file was deleted.

common/regex-partial.h

Lines changed: 0 additions & 56 deletions
This file was deleted.

conversion/__init__.py

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -51,6 +51,7 @@
5151
"DeepseekV3ForCausalLM": "deepseek",
5252
"DeepseekV32ForCausalLM": "deepseek",
5353
"DFlashDraftModel": "qwen",
54+
"DeepseekV4ForCausalLM": "deepseek",
5455
"DistilBertForMaskedLM": "bert",
5556
"DistilBertForSequenceClassification": "bert",
5657
"DistilBertModel": "bert",

conversion/base.py

Lines changed: 14 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1273,7 +1273,7 @@ def set_gguf_parameters(self):
12731273
if (f_norm_eps := self.find_hparam(["layer_norm_eps", "layer_norm_epsilon", "norm_epsilon"], optional=True)) is not None:
12741274
self.gguf_writer.add_layer_norm_eps(f_norm_eps)
12751275
logger.info(f"gguf: layer norm epsilon = {f_norm_eps}")
1276-
if (n_experts := self.find_hparam(["num_local_experts", "num_experts"], optional=True)) is not None:
1276+
if (n_experts := self.find_hparam(["num_local_experts", "num_experts", "n_routed_experts"], optional=True)) is not None:
12771277
self.gguf_writer.add_expert_count(n_experts)
12781278
logger.info(f"gguf: expert count = {n_experts}")
12791279
if (n_experts_used := self.find_hparam(["num_experts_per_tok", "num_experts_per_token", "top_k_experts"], optional=True)) is not None:
@@ -1291,6 +1291,8 @@ def set_gguf_parameters(self):
12911291
self.gguf_writer.add_expert_gating_func(gguf.ExpertGatingFuncType.SIGMOID)
12921292
elif score_func == "softmax":
12931293
self.gguf_writer.add_expert_gating_func(gguf.ExpertGatingFuncType.SOFTMAX)
1294+
elif score_func == "sqrtsoftplus":
1295+
self.gguf_writer.add_expert_gating_func(gguf.ExpertGatingFuncType.SQRTSOFTPLUS)
12941296
else:
12951297
raise ValueError(f"Unsupported expert score gating function value: {score_func}")
12961298
logger.info(f"gguf: expert score gating function = {score_func}")
@@ -2600,6 +2602,17 @@ def __torch_function__(cls, func, types, args=(), kwargs=None):
26002602
return cls._wrap_fn(func)(*args, **kwargs)
26012603

26022604

2605+
if hasattr(torch, "float8_e8m0fnu"):
2606+
_torch_float8_e8m0 = torch.float8_e8m0fnu
2607+
LazyTorchTensor._dtype_map[_torch_float8_e8m0] = np.uint8
2608+
LazyTorchTensor._dtype_byteswap_map[_torch_float8_e8m0] = np.uint8
2609+
LazyTorchTensor._dtype_str_map["F8_E8M0"] = _torch_float8_e8m0
2610+
else:
2611+
# Older torch builds do not expose F8_E8M0. Keep the raw bytes so callers
2612+
# that know the format can decode them explicitly.
2613+
LazyTorchTensor._dtype_str_map["F8_E8M0"] = torch.uint8
2614+
2615+
26032616
def get_model_architecture(hparams: dict[str, Any], model_type: ModelType) -> str:
26042617
# TODO @ngxson : this won't work correctly if the model has both audio & vision encoders
26052618
# maybe we should fallback to text model's arch in that case, since not many models have both

0 commit comments

Comments
 (0)