refactor: split doctor inference helpers

haasonsaas · haasonsaas · commit 06f7b721567d · 2026-03-13T09:02:13.000-07:00
Separate endpoint inference request construction, HTTP execution, and response parsing so the doctor probe stays linear and easier to extend.

Made-with: Cursor
diff --git a/TODO.md b/TODO.md
@@ -66,7 +66,7 @@
 
 - [x] `src/commands/eval/pattern/matching.rs`: split normalized rule-id helpers, matcher predicates, and focused matcher tests.
 - [x] `src/commands/eval/metrics/rules.rs`: separate aggregate math, rule counting, and summary reduction helpers.
-- [ ] `src/commands/doctor/endpoint/inference.rs`: split request building, HTTP execution/error handling, and response parsing.
+- [x] `src/commands/doctor/endpoint/inference.rs`: split request building, HTTP execution/error handling, and response parsing.
 - [ ] `src/commands/feedback_eval/report/build/stats.rs`: split threshold confusion-matrix scoring from bucket primitives.
 - [ ] `src/commands/doctor/command/display.rs`: separate header/config output, endpoint listing, and inference result rendering.
 - [ ] `src/commands/doctor/command/run.rs`: separate endpoint discovery, recommendation flow, and test helpers.
diff --git a/src/commands/doctor/endpoint/inference.rs b/src/commands/doctor/endpoint/inference.rs
@@ -1,138 +1,8 @@
-use anyhow::Result;
-use reqwest::Client;
-use serde_json::Value;
-
-pub(in super::super) async fn test_model_inference(
-    client: &Client,
-    base_url: &str,
-    model_name: &str,
-    endpoint_type: &str,
-) -> Result<String> {
-    let system_msg = "You are a code reviewer. Respond with a single JSON object.";
-    let user_msg =
-        "Review this code change:\n+fn add(a: i32, b: i32) -> i32 { a + b }\nRespond with: {\"ok\": true}";
-
-    let messages = serde_json::json!([
-        {"role": "system", "content": system_msg},
-        {"role": "user", "content": user_msg}
-    ]);
-
-    if endpoint_type == "ollama" {
-        let url = format!("{}/api/chat", base_url);
-        let body = serde_json::json!({
-            "model": model_name,
-            "messages": messages,
-            "stream": false,
-            "options": {"num_predict": 50}
-        });
-
-        let resp = client
-            .post(&url)
-            .json(&body)
-            .send()
-            .await
-            .map_err(|e| anyhow::anyhow!("Request failed: {}", e))?;
-
-        if !resp.status().is_success() {
-            let status = resp.status();
-            let body = resp.text().await.unwrap_or_default();
-            anyhow::bail!("HTTP {} - {}", status, body);
-        }
-
-        let text = resp.text().await?;
-        parse_ollama_response_content(&text)
-    } else {
-        let url = format!("{}/v1/chat/completions", base_url);
-        let body = serde_json::json!({
-            "model": model_name,
-            "messages": messages,
-            "max_tokens": 50,
-            "temperature": 0.1
-        });
-
-        let resp = client
-            .post(&url)
-            .json(&body)
-            .send()
-            .await
-            .map_err(|e| anyhow::anyhow!("Request failed: {}", e))?;
-
-        if !resp.status().is_success() {
-            let status = resp.status();
-            let body = resp.text().await.unwrap_or_default();
-            anyhow::bail!("HTTP {} - {}", status, body);
-        }
-
-        let text = resp.text().await?;
-        parse_openai_response_content(&text)
-    }
-}
-
-pub(in super::super) fn estimate_tokens(text: &str) -> usize {
-    (text.len() / 4).max(1)
-}
-
-fn parse_ollama_response_content(text: &str) -> Result<String> {
-    let value: Value = serde_json::from_str(text)?;
-    Ok(value
-        .get("message")
-        .and_then(|message| message.get("content"))
-        .and_then(|content| content.as_str())
-        .unwrap_or("")
-        .to_string())
-}
-
-fn parse_openai_response_content(text: &str) -> Result<String> {
-    let value: Value = serde_json::from_str(text)?;
-    Ok(value
-        .get("choices")
-        .and_then(|choices| choices.as_array())
-        .and_then(|choices| choices.first())
-        .and_then(|choice| choice.get("message"))
-        .and_then(|message| message.get("content"))
-        .and_then(|content| content.as_str())
-        .unwrap_or("")
-        .to_string())
-}
-
-#[cfg(test)]
-mod tests {
-    use super::*;
-
-    #[test]
-    fn test_estimate_tokens() {
-        assert_eq!(estimate_tokens(""), 1);
-        assert_eq!(estimate_tokens("abcd"), 1);
-        assert_eq!(estimate_tokens("abcdefgh"), 2);
-        assert_eq!(estimate_tokens("a]"), 1);
-    }
-
-    #[test]
-    fn test_estimate_tokens_longer_text() {
-        let text = "This is a longer response with several words in it for testing.";
-        let tokens = estimate_tokens(text);
-        assert!(tokens > 10);
-        assert!(tokens < 30);
-    }
-
-    #[test]
-    fn test_test_model_inference_ollama_parse() {
-        let json = r#"{"message":{"role":"assistant","content":"{\"ok\": true}"}}"#;
-        let content = parse_ollama_response_content(json).unwrap();
-        assert_eq!(content, "{\"ok\": true}");
-    }
-
-    #[test]
-    fn test_test_model_inference_openai_parse() {
-        let json = r#"{"choices":[{"message":{"content":"{\"ok\": true}"}}]}"#;
-        let content = parse_openai_response_content(json).unwrap();
-        assert_eq!(content, "{\"ok\": true}");
-    }
-
-    #[test]
-    fn test_test_model_inference_empty_choices() {
-        let json = r#"{"choices":[]}"#;
-        let content = parse_openai_response_content(json).unwrap();
-        assert_eq!(content, "");
-    }
-}
+#[path = "inference/request.rs"]
+mod request;
+#[path = "inference/response.rs"]
+mod response;
+#[path = "inference/run.rs"]
+mod run;
+
+pub(in super::super) use run::{estimate_tokens, test_model_inference};
diff --git a/src/commands/doctor/endpoint/inference/request.rs b/src/commands/doctor/endpoint/inference/request.rs
@@ -0,0 +1,55 @@
+use serde_json::{json, Value};
+
+pub(super) struct InferenceRequest {
+    pub(super) url: String,
+    pub(super) body: Value,
+}
+
+const SYSTEM_MSG: &str = "You are a code reviewer. Respond with a single JSON object.";
+const USER_MSG: &str =
+    "Review this code change:\n+fn add(a: i32, b: i32) -> i32 { a + b }\nRespond with: {\"ok\": true}";
+
+pub(super) fn build_inference_request(
+    base_url: &str,
+    model_name: &str,
+    endpoint_type: &str,
+) -> InferenceRequest {
+    let messages = build_probe_messages();
+
+    if endpoint_type == "ollama" {
+        build_ollama_request(base_url, model_name, messages)
+    } else {
+        build_openai_request(base_url, model_name, messages)
+    }
+}
+
+fn build_probe_messages() -> Value {
+    json!([
+        {"role": "system", "content": SYSTEM_MSG},
+        {"role": "user", "content": USER_MSG}
+    ])
+}
+
+fn build_ollama_request(base_url: &str, model_name: &str, messages: Value) -> InferenceRequest {
+    InferenceRequest {
+        url: format!("{}/api/chat", base_url),
+        body: json!({
+            "model": model_name,
+            "messages": messages,
+            "stream": false,
+            "options": {"num_predict": 50}
+        }),
+    }
+}
+
+fn build_openai_request(base_url: &str, model_name: &str, messages: Value) -> InferenceRequest {
+    InferenceRequest {
+        url: format!("{}/v1/chat/completions", base_url),
+        body: json!({
+            "model": model_name,
+            "messages": messages,
+            "max_tokens": 50,
+            "temperature": 0.1
+        }),
+    }
+}
diff --git a/src/commands/doctor/endpoint/inference/response.rs b/src/commands/doctor/endpoint/inference/response.rs
@@ -0,0 +1,59 @@
+use anyhow::Result;
+use serde_json::Value;
+
+pub(super) fn parse_inference_response_content(text: &str, endpoint_type: &str) -> Result<String> {
+    if endpoint_type == "ollama" {
+        parse_ollama_response_content(text)
+    } else {
+        parse_openai_response_content(text)
+    }
+}
+
+fn parse_ollama_response_content(text: &str) -> Result<String> {
+    let value: Value = serde_json::from_str(text)?;
+    Ok(value
+        .get("message")
+        .and_then(|message| message.get("content"))
+        .and_then(|content| content.as_str())
+        .unwrap_or("")
+        .to_string())
+}
+
+fn parse_openai_response_content(text: &str) -> Result<String> {
+    let value: Value = serde_json::from_str(text)?;
+    Ok(value
+        .get("choices")
+        .and_then(|choices| choices.as_array())
+        .and_then(|choices| choices.first())
+        .and_then(|choice| choice.get("message"))
+        .and_then(|message| message.get("content"))
+        .and_then(|content| content.as_str())
+        .unwrap_or("")
+        .to_string())
+}
+
+#[cfg(test)]
+mod tests {
+    use super::*;
+
+    #[test]
+    fn test_parse_inference_response_content_for_ollama() {
+        let json = r#"{"message":{"role":"assistant","content":"{\"ok\": true}"}}"#;
+        let content = parse_inference_response_content(json, "ollama").unwrap();
+        assert_eq!(content, "{\"ok\": true}");
+    }
+
+    #[test]
+    fn test_parse_inference_response_content_for_openai() {
+        let json = r#"{"choices":[{"message":{"content":"{\"ok\": true}"}}]}"#;
+        let content = parse_inference_response_content(json, "openai").unwrap();
+        assert_eq!(content, "{\"ok\": true}");
+    }
+
+    #[test]
+    fn test_parse_inference_response_content_for_empty_choices() {
+        let json = r#"{"choices":[]}"#;
+        let content = parse_inference_response_content(json, "openai").unwrap();
+        assert_eq!(content, "");
+    }
+}
diff --git a/src/commands/doctor/endpoint/inference/run.rs b/src/commands/doctor/endpoint/inference/run.rs
@@ -0,0 +1,63 @@
+use anyhow::{anyhow, bail, Result};
+use reqwest::{Client, Response};
+
+use super::request::{build_inference_request, InferenceRequest};
+use super::response::parse_inference_response_content;
+
+pub(in super::super::super) async fn test_model_inference(
+    client: &Client,
+    base_url: &str,
+    model_name: &str,
+    endpoint_type: &str,
+) -> Result<String> {
+    let request = build_inference_request(base_url, model_name, endpoint_type);
+    let response_text = execute_inference_request(client, request).await?;
+    parse_inference_response_content(&response_text, endpoint_type)
+}
+
+async fn execute_inference_request(client: &Client, request: InferenceRequest) -> Result<String> {
+    let response = client
+        .post(&request.url)
+        .json(&request.body)
+        .send()
+        .await
+        .map_err(|error| anyhow!("Request failed: {}", error))?;
+
+    read_inference_response(response).await
+}
+
+async fn read_inference_response(response: Response) -> Result<String> {
+    let status = response.status();
+    let text = response.text().await.unwrap_or_default();
+
+    if !status.is_success() {
+        bail!("HTTP {} - {}", status, text);
+    }
+
+    Ok(text)
+}
+
+pub(in super::super::super) fn estimate_tokens(text: &str) -> usize {
+    (text.len() / 4).max(1)
+}
+
+#[cfg(test)]
+mod tests {
+    use super::*;
+
+    #[test]
+    fn test_estimate_tokens() {
+        assert_eq!(estimate_tokens(""), 1);
+        assert_eq!(estimate_tokens("abcd"), 1);
+        assert_eq!(estimate_tokens("abcdefgh"), 2);
+        assert_eq!(estimate_tokens("a]"), 1);
+    }
+
+    #[test]
+    fn test_estimate_tokens_longer_text() {
+        let text = "This is a longer response with several words in it for testing.";
+        let tokens = estimate_tokens(text);
+        assert!(tokens > 10);
+        assert!(tokens < 30);
+    }
+}