@@ -43,7 +43,8 @@ type ChatCommand = {
4343 repeatPresencePenalty ?: number ,
4444 maxTokens : number ,
4545 noHistory : boolean ,
46- environmentFunctions : boolean
46+ environmentFunctions : boolean ,
47+ printTimings : boolean
4748} ;
4849
4950export const ChatCommand : CommandModule < object , ChatCommand > = {
@@ -197,20 +198,27 @@ export const ChatCommand: CommandModule<object, ChatCommand> = {
197198 default : false ,
198199 description : "Provide access to environment functions like `getDate` and `getTime`" ,
199200 group : "Optional:"
201+ } )
202+ . option ( "printTimings" , {
203+ alias : "pt" ,
204+ type : "boolean" ,
205+ default : false ,
206+ description : "Print llama.cpp timings after each response" ,
207+ group : "Optional:"
200208 } ) ;
201209 } ,
202210 async handler ( {
203211 model, systemInfo, systemPrompt, prompt, wrapper, contextSize,
204212 grammar, jsonSchemaGrammarFile, threads, temperature, topK, topP,
205213 gpuLayers, repeatPenalty, lastTokensRepeatPenalty, penalizeRepeatingNewLine,
206214 repeatFrequencyPenalty, repeatPresencePenalty, maxTokens, noHistory,
207- environmentFunctions
215+ environmentFunctions, printTimings
208216 } ) {
209217 try {
210218 await RunChat ( {
211219 model, systemInfo, systemPrompt, prompt, wrapper, contextSize, grammar, jsonSchemaGrammarFile, threads, temperature, topK,
212220 topP, gpuLayers, lastTokensRepeatPenalty, repeatPenalty, penalizeRepeatingNewLine, repeatFrequencyPenalty,
213- repeatPresencePenalty, maxTokens, noHistory, environmentFunctions
221+ repeatPresencePenalty, maxTokens, noHistory, environmentFunctions, printTimings
214222 } ) ;
215223 } catch ( err ) {
216224 console . error ( err ) ;
@@ -223,7 +231,8 @@ export const ChatCommand: CommandModule<object, ChatCommand> = {
223231async function RunChat ( {
224232 model : modelArg , systemInfo, systemPrompt, prompt, wrapper, contextSize, grammar : grammarArg ,
225233 jsonSchemaGrammarFile : jsonSchemaGrammarFilePath , threads, temperature, topK, topP, gpuLayers, lastTokensRepeatPenalty, repeatPenalty,
226- penalizeRepeatingNewLine, repeatFrequencyPenalty, repeatPresencePenalty, maxTokens, noHistory, environmentFunctions
234+ penalizeRepeatingNewLine, repeatFrequencyPenalty, repeatPresencePenalty, maxTokens, noHistory, environmentFunctions,
235+ printTimings
227236} : ChatCommand ) {
228237 const { LlamaChatSession} = await import ( "../../llamaEvaluator/LlamaChatSession/LlamaChatSession.js" ) ;
229238 const { LlamaModel} = await import ( "../../llamaEvaluator/LlamaModel.js" ) ;
@@ -370,6 +379,9 @@ async function RunChat({
370379 } ) ;
371380 process . stdout . write ( endColor ) ;
372381 console . log ( ) ;
382+
383+ if ( printTimings )
384+ context . printTimings ( ) ;
373385 }
374386}
375387
0 commit comments