Skip to content

Commit bce93bd

Browse files
tbitcsoz-agent
andauthored
fix(agent/run): silent no-response + provider visibility (#187)
* fix(agent/run): silent no-response + provider visibility — closes #186-followup Root causes of specsmith run returning nothing: 1. DEFAULT_OLLAMA_MODEL=qwen2.5:7b not installed; Ollama 404 silently swallowed 2. run_chat returning None gave user zero feedback 3. EventEmitter always wrote JSONL even in interactive terminal mode Fixes: - chat_runner: _pick_ollama_model() queries /api/tags and selects the first installed model from a preference list (lighter models first); falls back to DEFAULT_OLLAMA_MODEL only when the API is unreachable or list is empty. SPECSMITH_OLLAMA_MODEL env var still wins unconditionally. - runner: _handle_command() prints an actionable hint when run_chat returns None — explains whether Ollama is running (with model name) or no provider is available at all. - events: PlainTextEmitter subclass — token() writes raw text to stdout, emit() is a no-op. Used by AgentRunner when json_events=False so LLM responses render as readable prose instead of JSONL blobs. - runner: check_providers() probes all four providers (Ollama, Anthropic, OpenAI, Gemini) and returns ProviderStatus(name, available, model, note). - runner: _print_banner() shows a provider status table in interactive mode so the user knows upfront which model will respond before typing. - cli: specsmith run --check validates providers and exits 0/1 without starting the REPL. - run_interactive: adds a trailing newline after each streamed response so the next prompt doesn't bleed onto the last output line. 19 new tests in test_agent_run_feedback.py (854 total, 0 failures). Co-Authored-By: Oz <oz-agent@warp.dev> * style: ruff format cli.py Co-Authored-By: Oz <oz-agent@warp.dev> * style: fix import sort in test_agent_run_feedback.py Co-Authored-By: Oz <oz-agent@warp.dev> --------- Co-authored-by: Oz <oz-agent@warp.dev>
1 parent 205c077 commit bce93bd

5 files changed

Lines changed: 694 additions & 31 deletions

File tree

src/specsmith/agent/chat_runner.py

Lines changed: 54 additions & 9 deletions
Original file line numberDiff line numberDiff line change
@@ -34,6 +34,24 @@
3434

3535
DEFAULT_OLLAMA_HOST = "http://127.0.0.1:11434"
3636
DEFAULT_OLLAMA_MODEL = os.environ.get("SPECSMITH_OLLAMA_MODEL", "qwen2.5:7b")
37+
38+
# Ordered preference list used by _pick_ollama_model() when the default
39+
# model is not installed. Lighter / faster models first so the REPL
40+
# stays responsive on developer hardware.
41+
_OLLAMA_MODEL_PREFERENCE = [
42+
"qwen2.5:7b",
43+
"qwen2.5-coder:7b-instruct",
44+
"qwen3:8b",
45+
"mistral:7b-instruct-q4_0",
46+
"llama3:8b-instruct-q4_K_M",
47+
"qwen2.5:14b",
48+
"qwen2.5-coder:14b",
49+
"qwen3:14b",
50+
"mistral-nemo:12b",
51+
"phi4:14b-q4_K_M",
52+
"deepseek-r1:14b",
53+
"qwen3:30b-a3b",
54+
]
3755
SYSTEM_PROMPT = (
3856
"You are Nexus, the local-first agentic developer assistant inside "
3957
"Specsmith. Always end your response with the canonical contract:\n"
@@ -239,19 +257,54 @@ def run_single_prompt(prompt: str, *, max_tokens: int = 500) -> str | None: # n
239257
# ---------------------------------------------------------------------------
240258

241259

260+
def _ollama_alive(host: str) -> bool:
261+
try:
262+
with urlopen(f"{host}/api/tags", timeout=2): # noqa: S310
263+
return True
264+
except (URLError, TimeoutError, OSError):
265+
return False
266+
267+
268+
def _pick_ollama_model(host: str) -> str:
269+
"""Return the best available Ollama model for this machine.
270+
271+
Checks the installed model list from ``/api/tags`` and walks
272+
``_OLLAMA_MODEL_PREFERENCE`` to find the first match. Falls back
273+
to ``DEFAULT_OLLAMA_MODEL`` when the API is unreachable or the list
274+
is empty. An explicit ``SPECSMITH_OLLAMA_MODEL`` env var always wins.
275+
"""
276+
env_override = os.environ.get("SPECSMITH_OLLAMA_MODEL", "").strip()
277+
if env_override:
278+
return env_override
279+
try:
280+
with urlopen(f"{host}/api/tags", timeout=2) as resp: # noqa: S310
281+
data = json.loads(resp.read())
282+
installed = {m["name"] for m in data.get("models", []) if m.get("name")}
283+
for candidate in _OLLAMA_MODEL_PREFERENCE:
284+
if candidate in installed:
285+
return candidate
286+
# None of the preferred models found — use whatever is first
287+
if installed:
288+
return sorted(installed)[0]
289+
except Exception: # noqa: BLE001 — best-effort; fall through to default
290+
pass
291+
return DEFAULT_OLLAMA_MODEL
292+
293+
242294
def _run_ollama(
243295
messages: list[dict[str, str]],
244296
emitter: EventEmitter,
245297
block_id: str,
246298
) -> tuple[str | None, _UsageDelta]:
247299
"""Stream from a local Ollama daemon using only stdlib."""
248300
host = os.environ.get("OLLAMA_HOST", DEFAULT_OLLAMA_HOST).rstrip("/")
249-
model = os.environ.get("SPECSMITH_OLLAMA_MODEL", DEFAULT_OLLAMA_MODEL)
250301
usage = _UsageDelta()
251302

252303
if not _ollama_alive(host):
253304
return None, usage
254305

306+
model = _pick_ollama_model(host)
307+
255308
payload = json.dumps({"model": model, "messages": messages, "stream": True}).encode("utf-8")
256309
req = Request( # noqa: S310 - URL is a hardcoded localhost default
257310
f"{host}/api/chat",
@@ -284,14 +337,6 @@ def _run_ollama(
284337
return ("".join(pieces) if pieces else None), usage
285338

286339

287-
def _ollama_alive(host: str) -> bool:
288-
try:
289-
with urlopen(f"{host}/api/tags", timeout=2): # noqa: S310
290-
return True
291-
except (URLError, TimeoutError, OSError):
292-
return False
293-
294-
295340
def _run_anthropic(
296341
messages: list[dict[str, str]],
297342
emitter: EventEmitter,

src/specsmith/agent/events.py

Lines changed: 21 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -231,4 +231,24 @@ def task_complete(
231231
)
232232

233233

234-
__all__ = ["EventEmitter"]
234+
class PlainTextEmitter(EventEmitter):
235+
"""Human-readable variant of EventEmitter for interactive terminal sessions.
236+
237+
Writes LLM token chunks directly to the stream (no JSON wrapping) so
238+
``specsmith run`` without ``--json-events`` produces readable output.
239+
All non-token protocol events are silently dropped — system messages
240+
are handled separately by ``AgentRunner._default_emit_event``.
241+
"""
242+
243+
def token(self, block_id: str, text: str) -> None: # noqa: ARG002
244+
self.stream.write(text)
245+
with contextlib.suppress(Exception):
246+
self.stream.flush()
247+
248+
def emit(self, event: dict[str, Any]) -> None:
249+
# Drop JSONL protocol frames in plain-text mode; token() handles
250+
# the only output that matters for interactive sessions.
251+
pass
252+
253+
254+
__all__ = ["EventEmitter", "PlainTextEmitter"]

0 commit comments

Comments
 (0)