Skip to content

Commit 317986a

Browse files
author
Tooru
committed
Unload LM Studio models after runs
1 parent be8b59d commit 317986a

3 files changed

Lines changed: 182 additions & 0 deletions

File tree

harness/expert_questions/run_benchmark.py

Lines changed: 11 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -54,6 +54,7 @@ def get_logger(name=None):
5454
expand_models_with_thinking_variants,
5555
fetch_model_pricing,
5656
store_text,
57+
unload_lmstudio_models,
5758
)
5859
from harness.expert_questions.dataset import Question, load_questions
5960

@@ -1040,6 +1041,9 @@ def retry_qa_api_error_attempts(
10401041
status_counts=status_counts,
10411042
)
10421043

1044+
if any(_is_lmstudio_model(model) for model in models):
1045+
unload_lmstudio_models()
1046+
10431047
return summary
10441048

10451049

@@ -1331,6 +1335,10 @@ def _attempt_key(a: dict) -> tuple:
13311335
accuracy=overall_accuracy,
13321336
)
13331337

1338+
models_in_summary = [m for m in original_summary.get("models", []) if isinstance(m, str)]
1339+
if any(_is_lmstudio_model(model) for model in models_in_summary):
1340+
unload_lmstudio_models()
1341+
13341342
return original_summary
13351343

13361344

@@ -1844,6 +1852,9 @@ def _bucket_level_for_metrics(attempt: dict[str, Any], default_level: str | None
18441852
latest_summary = QA_RUNS_ROOT / "latest_summary.json"
18451853
store_text(latest_summary, json.dumps(summary, indent=2))
18461854

1855+
if any(_is_lmstudio_model(model) for model in requested_models):
1856+
unload_lmstudio_models()
1857+
18471858
return summary
18481859

18491860

harness/run_harness.py

Lines changed: 81 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -18,6 +18,7 @@
1818
import textwrap
1919
import time
2020
import uuid
21+
from urllib.parse import urlparse
2122
from collections import Counter, defaultdict
2223
from pathlib import Path
2324
from typing import Any
@@ -330,6 +331,74 @@ def _normalize_lmstudio_model_id(model: str) -> str:
330331
return model
331332

332333

334+
def _resolve_lms_path() -> str | None:
335+
resolved = shutil.which("lms")
336+
if resolved:
337+
return resolved
338+
fallback = Path.home() / ".lmstudio" / "bin" / "lms"
339+
if fallback.exists():
340+
return str(fallback)
341+
return None
342+
343+
344+
def _lmstudio_cli_instance_args(base_url: str) -> list[str]:
345+
trimmed = (base_url or "").strip()
346+
if not trimmed:
347+
return []
348+
if "://" not in trimmed:
349+
trimmed = f"http://{trimmed}"
350+
parsed = urlparse(trimmed)
351+
host = parsed.hostname or "127.0.0.1"
352+
port = parsed.port or 1234
353+
return ["--host", host, "--port", str(port)]
354+
355+
356+
def _truncate_cli_output(value: str, *, limit: int = 2000) -> str:
357+
cleaned = (value or "").strip()
358+
if len(cleaned) > limit:
359+
return f"{cleaned[:limit]}..."
360+
return cleaned
361+
362+
363+
def unload_lmstudio_models(*, base_url: str | None = None, timeout: int = 30) -> bool:
364+
"""Best-effort cleanup of loaded LM Studio models.
365+
366+
Returns True when the unload command succeeds; otherwise logs a warning and returns False.
367+
"""
368+
369+
resolved_base_url = (base_url or SETTINGS.lmstudio_base_url or "").strip()
370+
if not resolved_base_url:
371+
logger.warning("LM Studio base URL is not configured; skipping model unload")
372+
return False
373+
374+
lms_path = _resolve_lms_path()
375+
if not lms_path:
376+
logger.warning("LM Studio CLI 'lms' not found; skipping model unload")
377+
return False
378+
379+
instance_args = _lmstudio_cli_instance_args(resolved_base_url)
380+
try:
381+
unload = subprocess.run(
382+
[lms_path, "unload", "--all", *instance_args],
383+
capture_output=True,
384+
text=True,
385+
timeout=timeout,
386+
)
387+
except subprocess.TimeoutExpired:
388+
logger.warning("Timed out unloading LM Studio models")
389+
return False
390+
except OSError as exc:
391+
logger.warning("Unable to unload LM Studio models: %s", exc)
392+
return False
393+
394+
if unload.returncode != 0:
395+
detail = _truncate_cli_output(unload.stderr or unload.stdout)
396+
logger.warning("Unable to unload LM Studio models: %s", detail or "unknown error")
397+
return False
398+
399+
return True
400+
401+
333402
def _store_model_metadata(
334403
registry: dict[str, dict[str, Any]],
335404
model_id: str,
@@ -2262,6 +2331,9 @@ def retry_api_error_attempts(
22622331
logger.info("Retried: %d attempts", len(retried_attempts))
22632332
logger.info("Status breakdown: %s", dict(status_counts))
22642333

2334+
if any(_is_lmstudio_model(model) for model in models):
2335+
unload_lmstudio_models()
2336+
22652337
return summary
22662338

22672339

@@ -2402,6 +2474,9 @@ def retry_failed_attempts(
24022474
logger.info("Retried: %d attempts", len(retried_attempts))
24032475
logger.info("Status breakdown: %s", dict(status_counts))
24042476

2477+
if any(_is_lmstudio_model(model) for model in models):
2478+
unload_lmstudio_models()
2479+
24052480
return summary
24062481

24072482

@@ -2642,6 +2717,9 @@ def resume_incomplete_run(
26422717
logger.info("Total: %d attempts", len(all_attempts))
26432718
logger.info("Status breakdown: %s", dict(status_counts))
26442719

2720+
if any(_is_lmstudio_model(model) for model in all_models):
2721+
unload_lmstudio_models()
2722+
26452723
return summary
26462724

26472725

@@ -2876,6 +2954,9 @@ def _suggest_levels_for_model(model_id: str) -> list[str]:
28762954
latest_summary_path = RUN_ARTIFACTS / "latest_summary.json"
28772955
store_text(latest_summary_path, json.dumps(summary, indent=2))
28782956

2957+
if any(_is_lmstudio_model(model) for model in original_models):
2958+
unload_lmstudio_models()
2959+
28792960
return summary
28802961

28812962

tests/test_external_endpoint_config.py

Lines changed: 90 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -430,6 +430,96 @@ def test_switch_lmstudio_model_rejects_invalid_model_id(monkeypatch) -> None:
430430
assert "Invalid" in response.json()["detail"]
431431

432432

433+
def test_run_tasks_unloads_lmstudio_models_after_completion(monkeypatch, tmp_path) -> None:
434+
from harness import run_harness
435+
436+
called: dict[str, int] = {"count": 0}
437+
438+
def fake_unload(*, base_url: str | None = None, timeout: int = 30) -> bool:
439+
assert base_url is None or isinstance(base_url, str)
440+
assert timeout > 0
441+
called["count"] += 1
442+
return True
443+
444+
monkeypatch.setattr(run_harness, "unload_lmstudio_models", fake_unload)
445+
446+
def fake_evaluate_attempt(*args: Any, **kwargs: Any) -> dict:
447+
assert args is not None
448+
return {
449+
"task_id": kwargs["task_id"],
450+
"model": kwargs["model"],
451+
"provider": kwargs["preferred_provider"],
452+
"sample_index": kwargs["sample_index"],
453+
"status": "passed",
454+
"duration_seconds": 0.0,
455+
"api_latency_seconds": 0.0,
456+
"usage": {"prompt_tokens": 1, "completion_tokens": 1},
457+
"attempt_dir": "stub",
458+
}
459+
460+
monkeypatch.setattr(run_harness, "evaluate_attempt", fake_evaluate_attempt)
461+
462+
def fake_compute_metrics(*args: Any, **kwargs: Any) -> dict: # noqa: ARG001
463+
assert args is not None
464+
assert kwargs is not None
465+
return {}
466+
467+
monkeypatch.setattr(run_harness, "compute_metrics", fake_compute_metrics)
468+
monkeypatch.setattr(run_harness, "compute_metrics_by_thinking_level", fake_compute_metrics)
469+
470+
summary = run_harness.run_tasks(
471+
tasks=["python_bugfix_prime_checker"],
472+
models=["lmstudio/test-model"],
473+
samples=1,
474+
temperature=0.0,
475+
max_tokens=16,
476+
output_dir=tmp_path,
477+
run_id="run_test_lmstudio_unload",
478+
)
479+
480+
assert summary["run_id"] == "run_test_lmstudio_unload"
481+
assert called["count"] == 1
482+
483+
484+
def test_run_question_benchmark_unloads_lmstudio_models_after_completion(monkeypatch, tmp_path) -> None:
485+
from harness.expert_questions import run_benchmark
486+
487+
called: dict[str, int] = {"count": 0}
488+
489+
def fake_unload(*, base_url: str | None = None, timeout: int = 30) -> bool:
490+
assert base_url is None or isinstance(base_url, str)
491+
assert timeout > 0
492+
called["count"] += 1
493+
return True
494+
495+
monkeypatch.setattr(run_benchmark, "JUDGE_MODEL", None)
496+
monkeypatch.setattr(run_benchmark, "unload_lmstudio_models", fake_unload)
497+
498+
def fake_call_completion(*args: Any, **kwargs: Any) -> tuple[str, dict[str, Any], float]:
499+
assert args is not None
500+
assert kwargs is not None
501+
return (
502+
"stub",
503+
{"usage": {"prompt_tokens": 1, "completion_tokens": 1}},
504+
0.0,
505+
)
506+
507+
monkeypatch.setattr(run_benchmark, "_call_completion", fake_call_completion)
508+
509+
summary = run_benchmark.run_question_benchmark(
510+
models=["lmstudio/test-model"],
511+
samples=1,
512+
temperature=0.0,
513+
max_tokens=16,
514+
run_id="qa_test_lmstudio_unload",
515+
output_dir=tmp_path,
516+
question_limit=1,
517+
)
518+
519+
assert summary["run_id"] == "qa_test_lmstudio_unload"
520+
assert called["count"] == 1
521+
522+
433523
# =============================================================================
434524
# Expert Questions LM Studio URL Configuration Tests
435525
# =============================================================================

0 commit comments

Comments
 (0)