@@ -81,7 +81,7 @@ async function streamAiResponse(
8181 if ( data . response ) {
8282 fullResponse += data . response ;
8383
84- if ( Date . now ( ) - lastUpdate > 5000 ) {
84+ if ( Date . now ( ) - lastUpdate > 1000 ) {
8585 await bot . streamReply ( await markdownToHtml ( fullResponse ) , draft_id , 'HTML' ) ;
8686 lastUpdate = Date . now ( ) ;
8787 }
@@ -114,13 +114,15 @@ async function streamAiResponseGemma(
114114 const draft_id = Math . floor ( Math . random ( ) * 1000000 ) + 1 ;
115115 let fullResponse = '' ;
116116 let lastUpdate = 0 ;
117+ let buffer = '' ;
117118
118119 for ( ; ; ) {
119120 const { done, value } = await reader . read ( ) ;
120121 if ( done ) break ;
121122
122- const chunk = decoder . decode ( value ) ;
123- const lines = chunk . split ( '\n' ) ;
123+ buffer += decoder . decode ( value , { stream : true } ) ;
124+ let lines = buffer . split ( '\n' ) ;
125+ buffer = lines . pop ( ) || '' ;
124126
125127 for ( const line of lines ) {
126128 const trimmedLine = line . trim ( ) ;
@@ -131,13 +133,13 @@ async function streamAiResponseGemma(
131133 const data = JSON . parse ( trimmedLine . slice ( 6 ) ) ;
132134
133135 // Use the new OpenAI-style path or the legacy response key
134- const content = data . choices ?. [ 0 ] ?. delta ?. content || data . response || '' ;
136+ const content = data . choices ?. [ 0 ] ?. delta ?. content ?? data . response ?? '' ;
135137
136138 if ( content ) {
137139 fullResponse += content ;
138140
139- // Throttle updates to Telegram (5 seconds is sensible to avoid rate limits)
140- if ( Date . now ( ) - lastUpdate > 5000 ) {
141+ // Throttle updates to Telegram (1000ms is sensible to avoid rate limits)
142+ if ( Date . now ( ) - lastUpdate > 1000 ) {
141143 await bot . streamReply ( await markdownToHtml ( fullResponse ) , draft_id , 'HTML' ) ;
142144 lastUpdate = Date . now ( ) ;
143145 }
@@ -151,7 +153,28 @@ async function streamAiResponseGemma(
151153 }
152154
153155 // Final update to ensure the message is complete in Telegram
154- await bot . streamReply ( await markdownToHtml ( fullResponse ) , draft_id , 'HTML' ) ;
156+ try {
157+ const timeToWait = Math . max ( 0 , 1000 - ( Date . now ( ) - lastUpdate ) ) ;
158+ if ( timeToWait > 0 ) {
159+ await new Promise ( resolve => setTimeout ( resolve , timeToWait ) ) ;
160+ }
161+
162+ // Also process any leftover buffer just in case
163+ if ( buffer . trim ( ) ) {
164+ const trimmedLine = buffer . trim ( ) ;
165+ if ( trimmedLine . startsWith ( 'data: ' ) && trimmedLine !== 'data: [DONE]' ) {
166+ try {
167+ const data = JSON . parse ( trimmedLine . slice ( 6 ) ) ;
168+ const content = data . choices ?. [ 0 ] ?. delta ?. content ?? data . response ?? '' ;
169+ if ( content ) fullResponse += content ;
170+ } catch ( e ) { }
171+ }
172+ }
173+
174+ await bot . streamReply ( await markdownToHtml ( fullResponse ) , draft_id , 'HTML' ) ;
175+ } catch ( e ) {
176+ console . error ( 'Final streamReply failed:' , e ) ;
177+ }
155178
156179 return fullResponse ;
157180}
@@ -255,7 +278,7 @@ export default {
255278
256279 try {
257280 console . log ( 'Processing text message:' , prompt ) ;
258- const response = await streamAiResponseGemma ( bot , env , AI_MODELS . GEMMA , messages , 131072 ) ;
281+ const response = await streamAiResponseGemma ( bot , env , AI_MODELS . GEMMA , messages , 50000 ) ;
259282
260283 if ( response ) {
261284 await bot . reply ( await markdownToHtml ( response ) , 'HTML' ) ;
@@ -345,7 +368,7 @@ export default {
345368
346369 try {
347370 // @ts -expect-error broken bindings
348- const response = await env . AI . run ( AI_MODELS . LLAMA , { messages, max_tokens : 100 } ) ;
371+ const response = await env . AI . run ( AI_MODELS . LLAMA , { messages, max_completion_tokens : 100 } ) ;
349372
350373 // @ts -expect-error broken bindings
351374 if ( 'response' in response ) {
0 commit comments