Skip to content

Commit 6830b59

Browse files
feat: add start_conversation tool for stateful sessions with system prompt (#9)
Adds two new tools that make multi-turn conversations with local LLMs first-class, solving the problem where Claude defaulted to stateless chat_completion calls and had to manually re-send the system prompt on every turn. start_conversation opens a new session with a persistent system prompt and a first message, returning a response_id that locks in the persona for the entire conversation. continue_conversation sends the next message in the session by chaining previous_response_id automatically — the system prompt is preserved by LM Studio throughout with no manual history management needed. Also bumps the default max_tokens from 1024 to 2048 in chat_completion and text_completion to prevent responses being cut off mid-sentence. Requires LM Studio v0.3.29+.
1 parent baa6fab commit 6830b59

1 file changed

Lines changed: 208 additions & 3 deletions

File tree

lmstudio_bridge.py

Lines changed: 208 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -112,7 +112,7 @@ async def chat_completion(
112112
prompt: str,
113113
system_prompt: str = "",
114114
temperature: float = 0.7,
115-
max_tokens: int = 1024
115+
max_tokens: int = 2048
116116
) -> str:
117117
"""Generate a chat completion from the current LM Studio model.
118118
@@ -171,7 +171,7 @@ async def chat_completion(
171171
async def text_completion(
172172
prompt: str,
173173
temperature: float = 0.7,
174-
max_tokens: int = 1024,
174+
max_tokens: int = 2048,
175175
stop_sequences: Optional[List[str]] = None
176176
) -> str:
177177
"""Generate a raw text completion (non-chat format) from LM Studio.
@@ -182,7 +182,7 @@ async def text_completion(
182182
Args:
183183
prompt: The text prompt to complete
184184
temperature: Controls randomness (0.0 to 2.0, default 0.7)
185-
max_tokens: Maximum number of tokens to generate (default 1024)
185+
max_tokens: Maximum number of tokens to generate (default 2048)
186186
stop_sequences: Optional list of sequences where generation will stop
187187
188188
Returns:
@@ -379,6 +379,211 @@ async def create_response(
379379
return json.dumps({"error": f"Unexpected error: {str(e)}"})
380380

381381

382+
@mcp.tool()
383+
async def start_conversation(
384+
system_prompt: str,
385+
first_message: str,
386+
temperature: float = 0.7,
387+
max_tokens: int = 2048,
388+
model: Optional[str] = None
389+
) -> str:
390+
"""Start a stateful conversation with a persistent system prompt.
391+
392+
This is the recommended way to begin a multi-turn conversation with
393+
your local model. It locks in a system prompt for the entire session
394+
and returns a response_id you can pass to continue_conversation for
395+
all subsequent turns — no need to re-send the system prompt or
396+
manage message history manually.
397+
398+
Typical workflow:
399+
1. Call start_conversation(system_prompt=..., first_message=...)
400+
2. Note the 'response_id' in the returned JSON
401+
3. Call continue_conversation(response_id=..., message=...) for
402+
each subsequent turn
403+
404+
Args:
405+
system_prompt: The persona or instructions to apply for the whole
406+
conversation (e.g. "You are a friend at a bar,
407+
keep it casual and fun")
408+
first_message: The opening message to send to the model
409+
temperature: Controls randomness (0.0 to 1.0, default 0.7)
410+
max_tokens: Maximum tokens per response (default 2048)
411+
model: Model to use. Auto-detected if omitted.
412+
413+
Returns:
414+
JSON string with keys:
415+
- response_id: pass this to continue_conversation
416+
- message: the model's first response
417+
- model: the model that was used
418+
"""
419+
try:
420+
# Auto-detect model if not specified
421+
if model is None:
422+
try:
423+
current = await get_current_model()
424+
detected = current.replace("Currently loaded model: ", "").strip()
425+
if not detected or detected == "Unknown":
426+
raise ValueError("Could not determine current model")
427+
model = detected
428+
except Exception as e:
429+
log_error(f"Model auto-detection failed: {str(e)}")
430+
return json.dumps({
431+
"error": (
432+
"Could not detect the currently loaded model. "
433+
"Please specify a model explicitly via the 'model' parameter."
434+
)
435+
})
436+
437+
# Build the opening payload — system prompt embedded as instructions
438+
payload: Dict[str, Any] = {
439+
"input": first_message,
440+
"model": model,
441+
"stream": False,
442+
"instructions": system_prompt,
443+
}
444+
445+
log_info("Starting new stateful conversation")
446+
447+
response = requests.post(
448+
f"{LMSTUDIO_API_BASE}/responses",
449+
json=payload,
450+
timeout=60
451+
)
452+
453+
if response.status_code != 200:
454+
log_error(f"LM Studio API error: {response.status_code}")
455+
return json.dumps({
456+
"error": f"LM Studio returned status code {response.status_code}"
457+
})
458+
459+
data = response.json()
460+
log_info("Conversation started successfully")
461+
462+
# Extract the text content from the response
463+
message_text = ""
464+
output = data.get("output", [])
465+
if isinstance(output, list):
466+
for block in output:
467+
if isinstance(block, dict) and block.get("type") == "message":
468+
for content in block.get("content", []):
469+
if isinstance(content, dict) and content.get("type") == "output_text":
470+
message_text = content.get("text", "")
471+
break
472+
elif isinstance(output, str):
473+
message_text = output
474+
475+
return json.dumps({
476+
"response_id": data.get("id", ""),
477+
"message": message_text or data.get("output", ""),
478+
"model": data.get("model", model)
479+
})
480+
481+
except requests.exceptions.RequestException as e:
482+
log_error(f"Request error in start_conversation: {str(e)}")
483+
return json.dumps({"error": f"Failed to start conversation: {str(e)}"})
484+
except Exception as e:
485+
log_error(f"Unexpected error in start_conversation: {str(e)}")
486+
return json.dumps({"error": f"Unexpected error: {str(e)}"})
487+
488+
489+
@mcp.tool()
490+
async def continue_conversation(
491+
response_id: str,
492+
message: str,
493+
temperature: float = 0.7,
494+
max_tokens: int = 2048,
495+
model: Optional[str] = None
496+
) -> str:
497+
"""Continue a stateful conversation started with start_conversation.
498+
499+
Sends the next message in a conversation, automatically chaining
500+
context via the response_id. The system prompt from the original
501+
start_conversation call is preserved throughout — you never need
502+
to re-send it.
503+
504+
Args:
505+
response_id: The 'response_id' returned by start_conversation
506+
or a previous continue_conversation call
507+
message: Your next message in the conversation
508+
temperature: Controls randomness (0.0 to 1.0, default 0.7)
509+
max_tokens: Maximum tokens per response (default 2048)
510+
model: Model to use. Auto-detected if omitted.
511+
512+
Returns:
513+
JSON string with keys:
514+
- response_id: pass this to the next continue_conversation call
515+
- message: the model's response
516+
- model: the model that was used
517+
"""
518+
try:
519+
# Auto-detect model if not specified
520+
if model is None:
521+
try:
522+
current = await get_current_model()
523+
detected = current.replace("Currently loaded model: ", "").strip()
524+
if not detected or detected == "Unknown":
525+
raise ValueError("Could not determine current model")
526+
model = detected
527+
except Exception as e:
528+
log_error(f"Model auto-detection failed: {str(e)}")
529+
return json.dumps({
530+
"error": (
531+
"Could not detect the currently loaded model. "
532+
"Please specify a model explicitly via the 'model' parameter."
533+
)
534+
})
535+
536+
payload: Dict[str, Any] = {
537+
"input": message,
538+
"model": model,
539+
"stream": False,
540+
"previous_response_id": response_id,
541+
}
542+
543+
log_info(f"Continuing conversation (previous_response_id={response_id})")
544+
545+
response = requests.post(
546+
f"{LMSTUDIO_API_BASE}/responses",
547+
json=payload,
548+
timeout=60
549+
)
550+
551+
if response.status_code != 200:
552+
log_error(f"LM Studio API error: {response.status_code}")
553+
return json.dumps({
554+
"error": f"LM Studio returned status code {response.status_code}"
555+
})
556+
557+
data = response.json()
558+
log_info("Received continuation response")
559+
560+
# Extract the text content from the response
561+
message_text = ""
562+
output = data.get("output", [])
563+
if isinstance(output, list):
564+
for block in output:
565+
if isinstance(block, dict) and block.get("type") == "message":
566+
for content in block.get("content", []):
567+
if isinstance(content, dict) and content.get("type") == "output_text":
568+
message_text = content.get("text", "")
569+
break
570+
elif isinstance(output, str):
571+
message_text = output
572+
573+
return json.dumps({
574+
"response_id": data.get("id", ""),
575+
"message": message_text or data.get("output", ""),
576+
"model": data.get("model", model)
577+
})
578+
579+
except requests.exceptions.RequestException as e:
580+
log_error(f"Request error in continue_conversation: {str(e)}")
581+
return json.dumps({"error": f"Failed to continue conversation: {str(e)}"})
582+
except Exception as e:
583+
log_error(f"Unexpected error in continue_conversation: {str(e)}")
584+
return json.dumps({"error": f"Unexpected error: {str(e)}"})
585+
586+
382587
def main():
383588
"""Entry point for the package when installed via pip"""
384589
log_info("Starting LM Studio Bridge MCP Server")

0 commit comments

Comments
 (0)