Skip to content

Commit 4cba3f7

Browse files
committed
fix gemma4 streaming output
1 parent 0b5ffce commit 4cba3f7

1 file changed

Lines changed: 32 additions & 9 deletions

File tree

packages/worker/src/index.ts

Lines changed: 32 additions & 9 deletions
Original file line numberDiff line numberDiff line change
@@ -81,7 +81,7 @@ async function streamAiResponse(
8181
if (data.response) {
8282
fullResponse += data.response;
8383

84-
if (Date.now() - lastUpdate > 5000) {
84+
if (Date.now() - lastUpdate > 1000) {
8585
await bot.streamReply(await markdownToHtml(fullResponse), draft_id, 'HTML');
8686
lastUpdate = Date.now();
8787
}
@@ -114,13 +114,15 @@ async function streamAiResponseGemma(
114114
const draft_id = Math.floor(Math.random() * 1000000) + 1;
115115
let fullResponse = '';
116116
let lastUpdate = 0;
117+
let buffer = '';
117118

118119
for (;;) {
119120
const { done, value } = await reader.read();
120121
if (done) break;
121122

122-
const chunk = decoder.decode(value);
123-
const lines = chunk.split('\n');
123+
buffer += decoder.decode(value, { stream: true });
124+
let lines = buffer.split('\n');
125+
buffer = lines.pop() || '';
124126

125127
for (const line of lines) {
126128
const trimmedLine = line.trim();
@@ -131,13 +133,13 @@ async function streamAiResponseGemma(
131133
const data = JSON.parse(trimmedLine.slice(6));
132134

133135
// Use the new OpenAI-style path or the legacy response key
134-
const content = data.choices?.[0]?.delta?.content || data.response || '';
136+
const content = data.choices?.[0]?.delta?.content ?? data.response ?? '';
135137

136138
if (content) {
137139
fullResponse += content;
138140

139-
// Throttle updates to Telegram (5 seconds is sensible to avoid rate limits)
140-
if (Date.now() - lastUpdate > 5000) {
141+
// Throttle updates to Telegram (1000ms is sensible to avoid rate limits)
142+
if (Date.now() - lastUpdate > 1000) {
141143
await bot.streamReply(await markdownToHtml(fullResponse), draft_id, 'HTML');
142144
lastUpdate = Date.now();
143145
}
@@ -151,7 +153,28 @@ async function streamAiResponseGemma(
151153
}
152154

153155
// Final update to ensure the message is complete in Telegram
154-
await bot.streamReply(await markdownToHtml(fullResponse), draft_id, 'HTML');
156+
try {
157+
const timeToWait = Math.max(0, 1000 - (Date.now() - lastUpdate));
158+
if (timeToWait > 0) {
159+
await new Promise(resolve => setTimeout(resolve, timeToWait));
160+
}
161+
162+
// Also process any leftover buffer just in case
163+
if (buffer.trim()) {
164+
const trimmedLine = buffer.trim();
165+
if (trimmedLine.startsWith('data: ') && trimmedLine !== 'data: [DONE]') {
166+
try {
167+
const data = JSON.parse(trimmedLine.slice(6));
168+
const content = data.choices?.[0]?.delta?.content ?? data.response ?? '';
169+
if (content) fullResponse += content;
170+
} catch(e) {}
171+
}
172+
}
173+
174+
await bot.streamReply(await markdownToHtml(fullResponse), draft_id, 'HTML');
175+
} catch (e) {
176+
console.error('Final streamReply failed:', e);
177+
}
155178

156179
return fullResponse;
157180
}
@@ -255,7 +278,7 @@ export default {
255278

256279
try {
257280
console.log('Processing text message:', prompt);
258-
const response = await streamAiResponseGemma(bot, env, AI_MODELS.GEMMA, messages, 131072);
281+
const response = await streamAiResponseGemma(bot, env, AI_MODELS.GEMMA, messages, 50000);
259282

260283
if (response) {
261284
await bot.reply(await markdownToHtml(response), 'HTML');
@@ -345,7 +368,7 @@ export default {
345368

346369
try {
347370
// @ts-expect-error broken bindings
348-
const response = await env.AI.run(AI_MODELS.LLAMA, { messages, max_tokens: 100 });
371+
const response = await env.AI.run(AI_MODELS.LLAMA, { messages, max_completion_tokens: 100 });
349372

350373
// @ts-expect-error broken bindings
351374
if ('response' in response) {

0 commit comments

Comments
 (0)