@@ -188,13 +188,17 @@ impl LLM {
188188 // Use the model's embedded chat template when llama.cpp can detect it.
189189 // Falls back to hardcoded Gemma format when detection fails (e.g. Gemma 4
190190 // until llama-cpp-sys picks up the upstream Gemma 4 template detection fix).
191- let llm_input = self . model
191+ let llm_input = match self . model
192192 . chat_template ( None )
193193 . ok ( )
194194 . and_then ( |tmpl| self . model . apply_chat_template ( & tmpl, & messages, true ) . ok ( ) )
195- . unwrap_or_else ( || format ! (
196- "<start_of_turn>user\n {system}\n \n {user}<end_of_turn>\n <start_of_turn>model\n "
197- ) ) ;
195+ {
196+ Some ( s) => s,
197+ None => {
198+ eprintln ! ( "ltengine: apply_chat_template failed: using hardcoded Gemma format" ) ;
199+ format ! ( "<start_of_turn>user\n {system}\n \n {user}<end_of_turn>\n <start_of_turn>model\n " )
200+ }
201+ } ;
198202
199203 // BOS is not added by apply_chat_template — str_to_token handles it.
200204 let tokens_list = self . model
@@ -278,6 +282,17 @@ impl LLMContext<'_>{
278282 self . ctx . decode ( & mut batch) . with_context ( || "Failed to eval" ) ?;
279283 }
280284
285+ // Gemma 4 thinking mode emits thinking content before the actual response in two forms:
286+ // 1. <|channel>thought\n...<channel|>answer (full block with closing tag)
287+ // 2. <|channel>thought answer (no closing tag, space-separated)
288+ let output = if let Some ( pos) = output. find ( "<channel|>" ) {
289+ output[ pos + "<channel|>" . len ( ) ..] . to_owned ( )
290+ } else if let Some ( rest) = output. strip_prefix ( "<|channel>thought" ) {
291+ rest. trim_start_matches ( [ '\n' , ' ' ] ) . to_owned ( )
292+ } else {
293+ output
294+ } ;
295+
281296 // Gemma may emit <end_of_turn> as literal text when it cannot translate
282297 // (e.g. unsupported language/format combination) instead of the special
283298 // EOG token caught above. Strip it and treat empty output as an error.
@@ -286,6 +301,7 @@ impl LLMContext<'_>{
286301 if output. is_empty ( ) {
287302 return Err ( anyhow:: anyhow!( "Model produced empty output" ) ) ;
288303 }
304+
289305 Ok ( output)
290306 }
291307}
0 commit comments