Skip to content

Commit 9b3bb7a

Browse files
Merge pull request #570 from janhq/update-dev-from-master-2026-06-25-01-12
Sync master with upstream release b9784
2 parents 5577927 + 8be759e commit 9b3bb7a

138 files changed

Lines changed: 10848 additions & 10636 deletions

File tree

Some content is hidden

Large Commits have some content hidden by default. Use the searchbox below for content that may be hidden.

README.md

Lines changed: 3 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -142,7 +142,9 @@ Instructions for adding support for new models: [HOWTO-add-model.md](docs/develo
142142
- [x] [GigaChat-20B-A3B](https://huggingface.co/ai-sage/GigaChat-20B-A3B-instruct)
143143
- [X] [Trillion-7B-preview](https://huggingface.co/trillionlabs/Trillion-7B-preview)
144144
- [x] [Ling models](https://huggingface.co/collections/inclusionAI/ling-67c51c85b34a7ea0aba94c32)
145-
- [x] [LFM2 models](https://huggingface.co/collections/LiquidAI/lfm2-686d721927015b2ad73eaa38)
145+
- [x] [Liquid LFM2 models](https://huggingface.co/collections/LiquidAI/lfm2)
146+
- [x] [Liquid LFM2.5 models](https://huggingface.co/collections/LiquidAI/lfm25)
147+
- [x] [Liquid Nanos](https://huggingface.co/collections/LiquidAI/liquid-nanos)
146148
- [x] [Hunyuan models](https://huggingface.co/collections/tencent/hunyuan-dense-model-6890632cda26b19119c9c5e7)
147149
- [x] [BailingMoeV2 (Ring/Ling 2.0) models](https://huggingface.co/collections/inclusionAI/ling-v2-68bf1dd2fc34c306c1fa6f86)
148150
- [x] [Mellum models](https://huggingface.co/JetBrains/models?search=mellum)

common/CMakeLists.txt

Lines changed: 0 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -80,8 +80,6 @@ add_library(${TARGET}
8080
http.h
8181
imatrix-loader.cpp
8282
imatrix-loader.h
83-
json-partial.cpp
84-
json-partial.h
8583
json-schema-to-grammar.cpp
8684
llguidance.cpp
8785
log.cpp

common/json-partial.cpp

Lines changed: 0 additions & 324 deletions
This file was deleted.

common/json-partial.h

Lines changed: 0 additions & 39 deletions
This file was deleted.

conversion/__init__.py

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -46,6 +46,7 @@
4646
"DbrxForCausalLM": "dbrx",
4747
"DeciLMForCausalLM": "deci",
4848
"DeepseekForCausalLM": "deepseek",
49+
"DeepseekOCRForCausalLM": "deepseek",
4950
"DeepseekV2ForCausalLM": "deepseek",
5051
"DeepseekV3ForCausalLM": "deepseek",
5152
"DeepseekV32ForCausalLM": "deepseek",
@@ -124,6 +125,7 @@
124125
"LLaDAModelLM": "llada",
125126
"LLaMAForCausalLM": "llama",
126127
"Lfm25AudioTokenizer": "lfm2",
128+
"Lfm2BidirectionalModel": "lfm2",
127129
"Lfm2ForCausalLM": "lfm2",
128130
"Lfm2Model": "lfm2",
129131
"Lfm2MoeForCausalLM": "lfm2",
@@ -232,6 +234,7 @@
232234
"UMT5ForConditionalGeneration": "t5",
233235
"UMT5Model": "t5",
234236
"UltravoxModel": "ultravox",
237+
"UnlimitedOCRForCausalLM": "deepseek",
235238
"VLlama3ForCausalLM": "llama",
236239
"VoxtralForConditionalGeneration": "llama",
237240
"WavTokenizerDec": "wavtokenizer",
@@ -298,6 +301,7 @@
298301
"StepVLForConditionalGeneration": "step3",
299302
"Step3p7ForConditionalGeneration": "step3",
300303
"UltravoxModel": "ultravox",
304+
"UnlimitedOCRForCausalLM": "deepseek",
301305
"VoxtralForConditionalGeneration": "ultravox",
302306
"YoutuVLForConditionalGeneration": "youtuvl",
303307
}

conversion/deepseek.py

Lines changed: 10 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -14,7 +14,7 @@
1414
from .qwen import QwenModel
1515

1616

17-
@ModelBase.register("DeepseekOCRForCausalLM")
17+
@ModelBase.register("DeepseekOCRForCausalLM", "UnlimitedOCRForCausalLM")
1818
class DeepseekOCRVisionModel(MmprojModel):
1919
def __init__(self, *args, **kwargs):
2020
super().__init__(*args, **kwargs)
@@ -205,6 +205,8 @@ def prepare_tensors(self):
205205
@ModelBase.register(
206206
"DeepseekV2ForCausalLM",
207207
"DeepseekV3ForCausalLM",
208+
"DeepseekOCRForCausalLM",
209+
"UnlimitedOCRForCausalLM",
208210
"KimiVLForConditionalGeneration",
209211
"KimiK25ForConditionalGeneration",
210212
"YoutuForCausalLM",
@@ -224,7 +226,7 @@ def __init__(self, *args, **kwargs):
224226
self.origin_hf_arch = hparams.get('architectures', [None])[0]
225227

226228
# special handling for Deepseek OCR
227-
if self.origin_hf_arch in ("DeepseekOCRForCausalLM", "DeepseekOCR2ForCausalLM"):
229+
if self.origin_hf_arch in ("DeepseekOCRForCausalLM", "DeepseekOCR2ForCausalLM", "UnlimitedOCRForCausalLM"):
228230
self.model_arch = gguf.MODEL_ARCH.DEEPSEEK2OCR
229231
self.gguf_writer.arch = gguf.MODEL_ARCH_NAMES[self.model_arch]
230232
self.gguf_writer.add_architecture()
@@ -350,6 +352,12 @@ def set_gguf_parameters(self):
350352

351353
self.gguf_writer.add_rope_dimension_count(hparams["qk_rope_head_dim"])
352354

355+
# Unlimited-OCR sliding window; written for metadata, the decoder ignores it (full MHA)
356+
if is_ocr:
357+
sliding_window = hparams.get("sliding_window_size") or hparams.get("sliding_window")
358+
if sliding_window:
359+
self.gguf_writer.add_sliding_window(sliding_window)
360+
353361
if (rope_mscale_all := self.rope_parameters.get("mscale_all_dim")) is not None:
354362
# [TAG_DEEPSEEK2_YARN_LOG_MUL_FIX]
355363
# note: for legacy reasons, this is not consistent with the other usages of self.gguf_writer.add_rope_scaling_yarn_log_mul

conversion/lfm2.py

Lines changed: 10 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -64,22 +64,29 @@ def modify_tensors(self, data_torch: Tensor, name: str, bid: int | None) -> Iter
6464
yield from super().modify_tensors(data_torch, name, bid)
6565

6666

67-
@ModelBase.register("Lfm2Model")
67+
@ModelBase.register("Lfm2Model", "Lfm2BidirectionalModel")
6868
class LFM2ColBertModel(LFM2Model):
6969
model_arch = gguf.MODEL_ARCH.LFM2
7070
dense_tensor_name = "dense_2"
7171

72+
def set_gguf_parameters(self):
73+
super().set_gguf_parameters()
74+
if self.hf_arch == "Lfm2BidirectionalModel":
75+
self.gguf_writer.add_causal_attention(False)
76+
self._try_set_pooling_type()
77+
7278
def modify_tensors(self, data_torch: Tensor, name: str, bid: int | None) -> Iterable[tuple[str, Tensor]]:
7379
if not name.startswith(self.dense_tensor_name):
7480
name = "model." + name
7581

7682
yield from super().modify_tensors(data_torch, name, bid)
7783

7884
def generate_extra_tensors(self) -> Iterable[tuple[str, Tensor]]:
79-
# dense tensor is stored in a separate safetensors file
85+
# optional dense tensor is stored in a separate safetensors file
8086
from safetensors.torch import load_file
8187
tensors_file = self.dir_model / "1_Dense" / "model.safetensors"
82-
assert tensors_file.is_file()
88+
if not tensors_file.is_file():
89+
return
8390
tensor = load_file(tensors_file)["linear.weight"]
8491
self.gguf_writer.add_embedding_length_out(tensor.shape[0])
8592
yield f"{self.dense_tensor_name}.weight", tensor.clone()

docs/backend/snapdragon/CMakeUserPresets.json

Lines changed: 0 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -24,7 +24,6 @@
2424
"GGML_LLAMAFILE": "OFF",
2525
"GGML_OPENCL": "ON",
2626
"GGML_HEXAGON": "ON",
27-
"GGML_HEXAGON_FP32_QUANTIZE_GROUP_SIZE": "128",
2827
"LLAMA_OPENSSL": "OFF"
2928
}
3029
},
@@ -47,7 +46,6 @@
4746
"GGML_LLAMAFILE": "OFF",
4847
"GGML_OPENCL": "ON",
4948
"GGML_HEXAGON": "ON",
50-
"GGML_HEXAGON_FP32_QUANTIZE_GROUP_SIZE": "128",
5149
"LLAMA_OPENSSL": "OFF"
5250
}
5351
},
@@ -73,7 +71,6 @@
7371
"GGML_LLAMAFILE": "OFF",
7472
"GGML_OPENCL": "OFF",
7573
"GGML_HEXAGON": "ON",
76-
"GGML_HEXAGON_FP32_QUANTIZE_GROUP_SIZE": "128",
7774
"LLAMA_OPENSSL": "OFF"
7875
}
7976
},

ggml/CMakeLists.txt

Lines changed: 0 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -266,7 +266,6 @@ set (GGML_OPENCL_TARGET_VERSION "300" CACHE STRING
266266
"ggml: OpenCL API version to target")
267267

268268
option(GGML_HEXAGON "ggml: enable Hexagon backend" OFF)
269-
set(GGML_HEXAGON_FP32_QUANTIZE_GROUP_SIZE 128 CACHE STRING "ggml: quantize group size (32, 64, or 128)")
270269

271270
# toolchain for vulkan-shaders-gen
272271
set (GGML_VULKAN_SHADERS_GEN_TOOLCHAIN "" CACHE FILEPATH "ggml: toolchain file for vulkan-shaders-gen")

ggml/src/ggml-hexagon/CMakeLists.txt

Lines changed: 0 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -25,7 +25,6 @@ include(ExternalProject)
2525
option(GGML_HEXAGON_HTP_DEBUG "ggml-hexagon: enable HTP debug output" OFF)
2626
option(GGML_HEXAGON_FA_EXP2_HF "ggml-hexagon: use FP16 exp2 polynomial in FA softmax instead of F32 exp round-trip" OFF)
2727
set(GGML_HEXAGON_HTP_CERT "$ENV{HEXAGON_HTP_CERT}" CACHE PATH "ggml-hexagon: enable HTP library signing using certificate")
28-
set(GGML_HEXAGON_FP32_QUANTIZE_GROUP_SIZE 128 CACHE STRING "ggml-hexagon: quantize group size (32, 64, or 128)")
2928

3029
add_library(htp_iface OBJECT
3130
${CMAKE_CURRENT_BINARY_DIR}/htp_iface_stub.c)
@@ -72,15 +71,12 @@ function(build_htp_skel V)
7271
-DHEXAGON_SDK_ROOT=${HEXAGON_SDK_ROOT}
7372
-DHEXAGON_TOOLS_ROOT=${HEXAGON_TOOLS_ROOT}
7473
-DHEXAGON_HTP_DEBUG=${GGML_HEXAGON_HTP_DEBUG}
75-
-DGGML_HEXAGON_FP32_QUANTIZE_GROUP_SIZE=${GGML_HEXAGON_FP32_QUANTIZE_GROUP_SIZE}
7674
-DDSP_VERSION=${V}
7775
-DPREBUILT_LIB_DIR="toolv19_${V}")
7876
list(APPEND HTP_SKELS ${CMAKE_CURRENT_BINARY_DIR}/libggml-htp-${V}.so)
7977
set(HTP_SKELS ${HTP_SKELS} PARENT_SCOPE)
8078
endfunction()
8179

82-
build_htp_skel(v68)
83-
build_htp_skel(v69)
8480
build_htp_skel(v73)
8581
build_htp_skel(v75)
8682
build_htp_skel(v79)

0 commit comments

Comments
 (0)