@@ -112,7 +112,7 @@ async def chat_completion(
112112 prompt : str ,
113113 system_prompt : str = "" ,
114114 temperature : float = 0.7 ,
115- max_tokens : int = 1024
115+ max_tokens : int = 2048
116116) -> str :
117117 """Generate a chat completion from the current LM Studio model.
118118
@@ -171,7 +171,7 @@ async def chat_completion(
171171async def text_completion (
172172 prompt : str ,
173173 temperature : float = 0.7 ,
174- max_tokens : int = 1024 ,
174+ max_tokens : int = 2048 ,
175175 stop_sequences : Optional [List [str ]] = None
176176) -> str :
177177 """Generate a raw text completion (non-chat format) from LM Studio.
@@ -182,7 +182,7 @@ async def text_completion(
182182 Args:
183183 prompt: The text prompt to complete
184184 temperature: Controls randomness (0.0 to 2.0, default 0.7)
185- max_tokens: Maximum number of tokens to generate (default 1024 )
185+ max_tokens: Maximum number of tokens to generate (default 2048 )
186186 stop_sequences: Optional list of sequences where generation will stop
187187
188188 Returns:
@@ -379,6 +379,211 @@ async def create_response(
379379 return json .dumps ({"error" : f"Unexpected error: { str (e )} " })
380380
381381
382+ @mcp .tool ()
383+ async def start_conversation (
384+ system_prompt : str ,
385+ first_message : str ,
386+ temperature : float = 0.7 ,
387+ max_tokens : int = 2048 ,
388+ model : Optional [str ] = None
389+ ) -> str :
390+ """Start a stateful conversation with a persistent system prompt.
391+
392+ This is the recommended way to begin a multi-turn conversation with
393+ your local model. It locks in a system prompt for the entire session
394+ and returns a response_id you can pass to continue_conversation for
395+ all subsequent turns — no need to re-send the system prompt or
396+ manage message history manually.
397+
398+ Typical workflow:
399+ 1. Call start_conversation(system_prompt=..., first_message=...)
400+ 2. Note the 'response_id' in the returned JSON
401+ 3. Call continue_conversation(response_id=..., message=...) for
402+ each subsequent turn
403+
404+ Args:
405+ system_prompt: The persona or instructions to apply for the whole
406+ conversation (e.g. "You are a friend at a bar,
407+ keep it casual and fun")
408+ first_message: The opening message to send to the model
409+ temperature: Controls randomness (0.0 to 1.0, default 0.7)
410+ max_tokens: Maximum tokens per response (default 2048)
411+ model: Model to use. Auto-detected if omitted.
412+
413+ Returns:
414+ JSON string with keys:
415+ - response_id: pass this to continue_conversation
416+ - message: the model's first response
417+ - model: the model that was used
418+ """
419+ try :
420+ # Auto-detect model if not specified
421+ if model is None :
422+ try :
423+ current = await get_current_model ()
424+ detected = current .replace ("Currently loaded model: " , "" ).strip ()
425+ if not detected or detected == "Unknown" :
426+ raise ValueError ("Could not determine current model" )
427+ model = detected
428+ except Exception as e :
429+ log_error (f"Model auto-detection failed: { str (e )} " )
430+ return json .dumps ({
431+ "error" : (
432+ "Could not detect the currently loaded model. "
433+ "Please specify a model explicitly via the 'model' parameter."
434+ )
435+ })
436+
437+ # Build the opening payload — system prompt embedded as instructions
438+ payload : Dict [str , Any ] = {
439+ "input" : first_message ,
440+ "model" : model ,
441+ "stream" : False ,
442+ "instructions" : system_prompt ,
443+ }
444+
445+ log_info ("Starting new stateful conversation" )
446+
447+ response = requests .post (
448+ f"{ LMSTUDIO_API_BASE } /responses" ,
449+ json = payload ,
450+ timeout = 60
451+ )
452+
453+ if response .status_code != 200 :
454+ log_error (f"LM Studio API error: { response .status_code } " )
455+ return json .dumps ({
456+ "error" : f"LM Studio returned status code { response .status_code } "
457+ })
458+
459+ data = response .json ()
460+ log_info ("Conversation started successfully" )
461+
462+ # Extract the text content from the response
463+ message_text = ""
464+ output = data .get ("output" , [])
465+ if isinstance (output , list ):
466+ for block in output :
467+ if isinstance (block , dict ) and block .get ("type" ) == "message" :
468+ for content in block .get ("content" , []):
469+ if isinstance (content , dict ) and content .get ("type" ) == "output_text" :
470+ message_text = content .get ("text" , "" )
471+ break
472+ elif isinstance (output , str ):
473+ message_text = output
474+
475+ return json .dumps ({
476+ "response_id" : data .get ("id" , "" ),
477+ "message" : message_text or data .get ("output" , "" ),
478+ "model" : data .get ("model" , model )
479+ })
480+
481+ except requests .exceptions .RequestException as e :
482+ log_error (f"Request error in start_conversation: { str (e )} " )
483+ return json .dumps ({"error" : f"Failed to start conversation: { str (e )} " })
484+ except Exception as e :
485+ log_error (f"Unexpected error in start_conversation: { str (e )} " )
486+ return json .dumps ({"error" : f"Unexpected error: { str (e )} " })
487+
488+
489+ @mcp .tool ()
490+ async def continue_conversation (
491+ response_id : str ,
492+ message : str ,
493+ temperature : float = 0.7 ,
494+ max_tokens : int = 2048 ,
495+ model : Optional [str ] = None
496+ ) -> str :
497+ """Continue a stateful conversation started with start_conversation.
498+
499+ Sends the next message in a conversation, automatically chaining
500+ context via the response_id. The system prompt from the original
501+ start_conversation call is preserved throughout — you never need
502+ to re-send it.
503+
504+ Args:
505+ response_id: The 'response_id' returned by start_conversation
506+ or a previous continue_conversation call
507+ message: Your next message in the conversation
508+ temperature: Controls randomness (0.0 to 1.0, default 0.7)
509+ max_tokens: Maximum tokens per response (default 2048)
510+ model: Model to use. Auto-detected if omitted.
511+
512+ Returns:
513+ JSON string with keys:
514+ - response_id: pass this to the next continue_conversation call
515+ - message: the model's response
516+ - model: the model that was used
517+ """
518+ try :
519+ # Auto-detect model if not specified
520+ if model is None :
521+ try :
522+ current = await get_current_model ()
523+ detected = current .replace ("Currently loaded model: " , "" ).strip ()
524+ if not detected or detected == "Unknown" :
525+ raise ValueError ("Could not determine current model" )
526+ model = detected
527+ except Exception as e :
528+ log_error (f"Model auto-detection failed: { str (e )} " )
529+ return json .dumps ({
530+ "error" : (
531+ "Could not detect the currently loaded model. "
532+ "Please specify a model explicitly via the 'model' parameter."
533+ )
534+ })
535+
536+ payload : Dict [str , Any ] = {
537+ "input" : message ,
538+ "model" : model ,
539+ "stream" : False ,
540+ "previous_response_id" : response_id ,
541+ }
542+
543+ log_info (f"Continuing conversation (previous_response_id={ response_id } )" )
544+
545+ response = requests .post (
546+ f"{ LMSTUDIO_API_BASE } /responses" ,
547+ json = payload ,
548+ timeout = 60
549+ )
550+
551+ if response .status_code != 200 :
552+ log_error (f"LM Studio API error: { response .status_code } " )
553+ return json .dumps ({
554+ "error" : f"LM Studio returned status code { response .status_code } "
555+ })
556+
557+ data = response .json ()
558+ log_info ("Received continuation response" )
559+
560+ # Extract the text content from the response
561+ message_text = ""
562+ output = data .get ("output" , [])
563+ if isinstance (output , list ):
564+ for block in output :
565+ if isinstance (block , dict ) and block .get ("type" ) == "message" :
566+ for content in block .get ("content" , []):
567+ if isinstance (content , dict ) and content .get ("type" ) == "output_text" :
568+ message_text = content .get ("text" , "" )
569+ break
570+ elif isinstance (output , str ):
571+ message_text = output
572+
573+ return json .dumps ({
574+ "response_id" : data .get ("id" , "" ),
575+ "message" : message_text or data .get ("output" , "" ),
576+ "model" : data .get ("model" , model )
577+ })
578+
579+ except requests .exceptions .RequestException as e :
580+ log_error (f"Request error in continue_conversation: { str (e )} " )
581+ return json .dumps ({"error" : f"Failed to continue conversation: { str (e )} " })
582+ except Exception as e :
583+ log_error (f"Unexpected error in continue_conversation: { str (e )} " )
584+ return json .dumps ({"error" : f"Unexpected error: { str (e )} " })
585+
586+
382587def main ():
383588 """Entry point for the package when installed via pip"""
384589 log_info ("Starting LM Studio Bridge MCP Server" )
0 commit comments