Skip to content

Commit a3f1450

Browse files
Introduce DeepSeekR1Qwen model and integrate with Qwen2ModelLoader
1 parent 3aa399b commit a3f1450

File tree

3 files changed

+33
-2
lines changed

3 files changed

+33
-2
lines changed

src/main/java/org/beehive/gpullama3/model/format/Qwen3ChatFormat.java

Lines changed: 6 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -101,7 +101,12 @@ public List<Integer> encodeMessage(Message message) {
101101

102102
@Override
103103
public int getBeginOfText() {
104-
return beginOfText;
104+
if (beginOfText == -1) {
105+
// deepseek-r1
106+
return startHeader;
107+
} else {
108+
return beginOfText;
109+
}
105110
}
106111

107112
@Override

src/main/java/org/beehive/gpullama3/model/loader/Qwen2ModelLoader.java

Lines changed: 4 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -12,6 +12,7 @@
1212
import org.beehive.gpullama3.inference.weights.tornado.Qwen2TornadoWeights;
1313
import org.beehive.gpullama3.model.format.ChatFormat;
1414
import org.beehive.gpullama3.model.format.ChatFormat.ChatTokens;
15+
import org.beehive.gpullama3.model.qwen2.DeepSeekR1Qwen;
1516
import org.beehive.gpullama3.model.qwen2.Qwen2;
1617
import org.beehive.gpullama3.model.qwen2.Qwen2Configuration;
1718
import org.beehive.gpullama3.tokenizer.Qwen3Tokenizer;
@@ -85,7 +86,9 @@ protected Qwen2 createModel(Qwen2Configuration config, Tokenizer tokenizer, Weig
8586
// Qwen2.5-Coder uses <|endoftext|> as stop-token.
8687
ChatTokens chatTokens = isDeepSeekR1DistillQwen ? new ChatTokens("<|begin▁of▁sentence|>", "", "", "<|end▁of▁sentence|>", "")
8788
: new ChatTokens("<|im_start|>", "<|im_end|>", "", "<|end_of_text|>", "<|endoftext|>");
88-
return new Qwen2(config, tokenizer, weights, ChatFormat.create(tokenizer, chatTokens));
89+
return isDeepSeekR1DistillQwen
90+
? new DeepSeekR1Qwen(config, tokenizer, weights, ChatFormat.create(tokenizer, chatTokens))
91+
: new Qwen2(config, tokenizer, weights, ChatFormat.create(tokenizer, chatTokens));
8992
}
9093
// @formatter:on
9194

Lines changed: 23 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,23 @@
1+
package org.beehive.gpullama3.model.qwen2;
2+
3+
import org.beehive.gpullama3.inference.weights.Weights;
4+
import org.beehive.gpullama3.model.ModelType;
5+
import org.beehive.gpullama3.model.format.ChatFormat;
6+
import org.beehive.gpullama3.tokenizer.Tokenizer;
7+
8+
public class DeepSeekR1Qwen extends Qwen2 {
9+
10+
public DeepSeekR1Qwen(Qwen2Configuration configuration, Tokenizer tokenizer, Weights weights, ChatFormat chatFormat) {
11+
super(configuration, tokenizer, weights, chatFormat);
12+
}
13+
14+
@Override
15+
public ModelType getModelType() {
16+
return ModelType.DEEPSEEK_R1_DISTILL_QWEN;
17+
}
18+
19+
@Override
20+
public boolean shouldAddBeginOfText() {
21+
return true;
22+
}
23+
}

0 commit comments

Comments
 (0)