Skip to content

Commit 9bd352c

Browse files
tbitcsoz-agent
andcommitted
feat: specsmith serve — persistent HTTP server for agent sessions
New CLI command: specsmith serve --port 8421 --project-dir . Zero-dependency HTTP server (stdlib http.server + ThreadingMixIn) with: - POST /api/send — send messages - GET /api/events — SSE event stream (same JSONL protocol) - GET /api/status — session state - POST /api/stop — interrupt current turn - GET /api/health — liveness probe Eliminates Python startup + import time (~2s) and Ollama cold-load (~5-30s) by keeping the process and model warm indefinitely. The VS Code extension auto-detects a running serve instance and connects via HTTP instead of spawning a child process. Co-Authored-By: Oz <oz-agent@warp.dev>
1 parent c16c47a commit 9bd352c

2 files changed

Lines changed: 328 additions & 0 deletions

File tree

src/specsmith/cli.py

Lines changed: 38 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -2064,6 +2064,44 @@ def run_cmd(
20642064
raise SystemExit(1) from None
20652065

20662066

2067+
@main.command(name="serve")
2068+
@click.option(
2069+
"--project-dir",
2070+
type=click.Path(exists=True),
2071+
default=".",
2072+
help="Project root directory.",
2073+
)
2074+
@click.option("--provider", default="ollama", help="LLM provider.")
2075+
@click.option("--model", default="", help="Model name (blank = provider default).")
2076+
@click.option("--port", type=int, default=8421, help="HTTP port to listen on.")
2077+
@click.option("--host", default="127.0.0.1", help="Bind address (use 0.0.0.0 for network access).")
2078+
def serve_cmd(
2079+
project_dir: str,
2080+
provider: str,
2081+
model: str,
2082+
port: int,
2083+
host: str,
2084+
) -> None:
2085+
"""Start a persistent HTTP server for agent sessions.
2086+
2087+
Faster than `specsmith run` — keeps the Python process and Ollama
2088+
model warm between turns. Connect via SSE (GET /api/events) and
2089+
POST /api/send.
2090+
2091+
Example:
2092+
specsmith serve --port 8421 --provider ollama --model qwen2.5:14b
2093+
"""
2094+
from specsmith.serve import run_server
2095+
2096+
run_server(
2097+
project_dir=project_dir,
2098+
provider=provider,
2099+
model=model,
2100+
port=port,
2101+
host=host,
2102+
)
2103+
2104+
20672105
@main.group(name="agent")
20682106
def agent_group() -> None:
20692107
"""Manage the specsmith agentic client configuration."""

src/specsmith/serve.py

Lines changed: 290 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,290 @@
1+
# SPDX-License-Identifier: MIT
2+
# Copyright (c) 2026 BitConcepts, LLC. All rights reserved.
3+
"""specsmith serve — persistent HTTP server for agent sessions.
4+
5+
Replaces the stdio-based ``specsmith run --json-events`` with an HTTP
6+
server that can be connected to by multiple clients (VS Code, browser,
7+
scripts) without restarting the Python process or cold-loading the
8+
Ollama model.
9+
10+
Endpoints:
11+
POST /api/send — send a user message (body: {"text": "..."})
12+
GET /api/events — SSE stream of JSONL events
13+
GET /api/status — session state JSON
14+
POST /api/stop — stop the current turn
15+
GET /api/health — liveness probe
16+
17+
Launch:
18+
specsmith serve --port 8421 --project-dir .
19+
"""
20+
21+
from __future__ import annotations
22+
23+
import contextlib
24+
import json
25+
import queue
26+
import sys
27+
import threading
28+
from http.server import BaseHTTPRequestHandler, HTTPServer
29+
from pathlib import Path
30+
from socketserver import ThreadingMixIn
31+
from typing import Any
32+
33+
34+
class _ThreadedHTTPServer(ThreadingMixIn, HTTPServer):
35+
"""Non-blocking HTTP server (one thread per request)."""
36+
37+
daemon_threads = True
38+
allow_reuse_address = True
39+
40+
41+
class _EventBus:
42+
"""Thread-safe event bus — the agent thread pushes events, SSE
43+
clients consume them. Supports multiple concurrent SSE listeners."""
44+
45+
def __init__(self) -> None:
46+
self._listeners: list[queue.Queue[dict[str, Any] | None]] = []
47+
self._lock = threading.Lock()
48+
49+
def subscribe(self) -> queue.Queue[dict[str, Any] | None]:
50+
q: queue.Queue[dict[str, Any] | None] = queue.Queue(maxsize=512)
51+
with self._lock:
52+
self._listeners.append(q)
53+
return q
54+
55+
def unsubscribe(self, q: queue.Queue[dict[str, Any] | None]) -> None:
56+
with self._lock, contextlib.suppress(ValueError):
57+
self._listeners.remove(q)
58+
59+
def emit(self, event: dict[str, Any]) -> None:
60+
with self._lock:
61+
for q in self._listeners:
62+
with contextlib.suppress(queue.Full):
63+
q.put_nowait(event)
64+
65+
66+
class _AgentThread:
67+
"""Wraps AgentRunner in a background thread with message passing."""
68+
69+
def __init__(
70+
self,
71+
project_dir: str,
72+
provider: str,
73+
model: str,
74+
bus: _EventBus,
75+
) -> None:
76+
self._project_dir = project_dir
77+
self._provider = provider
78+
self._model = model
79+
self._bus = bus
80+
self._inbox: queue.Queue[str | None] = queue.Queue()
81+
self._runner: Any = None
82+
self._thread: threading.Thread | None = None
83+
self._started = False
84+
85+
def start(self) -> None:
86+
self._thread = threading.Thread(target=self._run, daemon=True, name="agent")
87+
self._thread.start()
88+
89+
def send(self, text: str) -> None:
90+
self._inbox.put(text)
91+
92+
def stop_turn(self) -> None:
93+
"""Interrupt the current agent turn (best-effort)."""
94+
# The runner checks a flag between tool calls
95+
if self._runner:
96+
self._runner._hard_stop = True # noqa: SLF001
97+
98+
def status(self) -> dict[str, Any]:
99+
if not self._runner:
100+
return {"status": "starting"}
101+
st = self._runner._state # noqa: SLF001
102+
return {
103+
"status": "running" if self._started else "starting",
104+
"provider": st.provider_name,
105+
"model": st.model_name,
106+
"tokens": st.session_tokens,
107+
"cost_usd": st.total_cost_usd,
108+
"tool_calls": st.tool_calls_made,
109+
"elapsed_min": round(st.elapsed_minutes, 1),
110+
}
111+
112+
def _run(self) -> None:
113+
"""Agent loop — runs in a background thread."""
114+
try:
115+
from specsmith.agent.runner import AgentRunner
116+
117+
self._runner = AgentRunner(
118+
project_dir=self._project_dir,
119+
provider_name=self._provider,
120+
model=self._model,
121+
json_events=True,
122+
)
123+
124+
# Monkey-patch _emit_event to route through the bus
125+
126+
def _bus_emit(**kwargs: Any) -> None:
127+
self._bus.emit(kwargs)
128+
129+
self._runner._emit_event = _bus_emit # noqa: SLF001
130+
131+
# Print banner (emits 'ready' event)
132+
self._runner._print_banner() # noqa: SLF001
133+
self._started = True
134+
135+
# Main loop: read from inbox, dispatch to runner
136+
while True:
137+
text = self._inbox.get()
138+
if text is None:
139+
break # shutdown signal
140+
try:
141+
self._runner._handle_command(text) # noqa: SLF001
142+
except Exception as e: # noqa: BLE001
143+
self._bus.emit({"type": "error", "message": str(e)})
144+
self._bus.emit({"type": "turn_done"})
145+
except Exception as e: # noqa: BLE001
146+
self._bus.emit({"type": "error", "message": f"Agent crashed: {e}"})
147+
finally:
148+
self._bus.emit({"type": "system", "message": "Agent thread ended."})
149+
150+
151+
class _Handler(BaseHTTPRequestHandler):
152+
"""HTTP request handler for the serve endpoints."""
153+
154+
bus: _EventBus
155+
agent: _AgentThread
156+
157+
def log_message(self, format: str, *args: Any) -> None: # noqa: A002
158+
"""Suppress default stderr logging."""
159+
160+
def do_GET(self) -> None: # noqa: N802
161+
if self.path == "/api/events":
162+
self._sse()
163+
elif self.path == "/api/status":
164+
self._json_response(self.agent.status())
165+
elif self.path == "/api/health":
166+
self._json_response({"ok": True})
167+
else:
168+
self.send_error(404)
169+
170+
def do_POST(self) -> None: # noqa: N802
171+
if self.path == "/api/send":
172+
body = self._read_json()
173+
text = body.get("text", "").strip() if body else ""
174+
if not text:
175+
self._json_response({"error": "missing text"}, code=400)
176+
return
177+
self.agent.send(text)
178+
self._json_response({"ok": True})
179+
elif self.path == "/api/stop":
180+
self.agent.stop_turn()
181+
self._json_response({"ok": True})
182+
else:
183+
self.send_error(404)
184+
185+
def do_DELETE(self) -> None: # noqa: N802
186+
if self.path == "/api/session":
187+
self.agent.send(None) # type: ignore[arg-type]
188+
self._json_response({"ok": True, "message": "session ending"})
189+
else:
190+
self.send_error(404)
191+
192+
# ── SSE ────────────────────────────────────────────────────────
193+
194+
def _sse(self) -> None:
195+
self.send_response(200)
196+
self.send_header("Content-Type", "text/event-stream")
197+
self.send_header("Cache-Control", "no-cache")
198+
self.send_header("Connection", "keep-alive")
199+
self.send_header("Access-Control-Allow-Origin", "*")
200+
self.end_headers()
201+
202+
q = self.bus.subscribe()
203+
try:
204+
while True:
205+
try:
206+
event = q.get(timeout=30)
207+
except queue.Empty:
208+
# Send SSE comment as keepalive
209+
self.wfile.write(b": keepalive\n\n")
210+
self.wfile.flush()
211+
continue
212+
if event is None:
213+
break
214+
data = json.dumps(event, ensure_ascii=False)
215+
self.wfile.write(f"data: {data}\n\n".encode())
216+
self.wfile.flush()
217+
except (BrokenPipeError, ConnectionResetError, OSError):
218+
pass # client disconnected
219+
finally:
220+
self.bus.unsubscribe(q)
221+
222+
# ── Helpers ────────────────────────────────────────────────────
223+
224+
def _read_json(self) -> dict[str, Any] | None:
225+
length = int(self.headers.get("Content-Length", 0))
226+
if length == 0:
227+
return None
228+
raw = self.rfile.read(length)
229+
try:
230+
return json.loads(raw)
231+
except json.JSONDecodeError:
232+
return None
233+
234+
def _json_response(
235+
self,
236+
data: dict[str, Any],
237+
code: int = 200,
238+
) -> None:
239+
body = json.dumps(data, ensure_ascii=False).encode()
240+
self.send_response(code)
241+
self.send_header("Content-Type", "application/json")
242+
self.send_header("Content-Length", str(len(body)))
243+
self.send_header("Access-Control-Allow-Origin", "*")
244+
self.end_headers()
245+
self.wfile.write(body)
246+
247+
248+
def run_server(
249+
*,
250+
project_dir: str = ".",
251+
provider: str = "ollama",
252+
model: str = "",
253+
port: int = 8421,
254+
host: str = "127.0.0.1",
255+
) -> None:
256+
"""Start the specsmith HTTP server."""
257+
project_dir = str(Path(project_dir).resolve())
258+
bus = _EventBus()
259+
agent = _AgentThread(project_dir, provider, model, bus)
260+
261+
# Subclass to carry shared state into the handler
262+
class Handler(_Handler):
263+
pass
264+
265+
Handler.bus = bus # type: ignore[assignment]
266+
Handler.agent = agent # type: ignore[assignment]
267+
268+
server = _ThreadedHTTPServer((host, port), Handler)
269+
agent.start()
270+
271+
print( # noqa: T201
272+
f"specsmith serve — http://{host}:{port}\n"
273+
f" Project: {project_dir}\n"
274+
f" Provider: {provider}/{model or '(default)'}\n"
275+
f" Endpoints:\n"
276+
f" GET /api/events — SSE event stream\n"
277+
f" POST /api/send — send a message\n"
278+
f" GET /api/status — session status\n"
279+
f" POST /api/stop — stop current turn\n"
280+
f" Press Ctrl+C to stop.\n",
281+
file=sys.stderr,
282+
)
283+
284+
try:
285+
server.serve_forever()
286+
except KeyboardInterrupt:
287+
pass
288+
finally:
289+
agent.send(None) # type: ignore[arg-type]
290+
server.shutdown()

0 commit comments

Comments
 (0)