evalops
diff --git a/‎src/adapters/anthropic.rs‎
Lines changed: 2 additions & 0 deletions b/‎src/adapters/anthropic.rs‎
Lines changed: 2 additions & 0 deletions
diff --git a/‎src/adapters/llm.rs‎
Lines changed: 30 additions & 0 deletions b/‎src/adapters/llm.rs‎
Lines changed: 30 additions & 0 deletions
diff --git a/‎src/adapters/ollama.rs‎
Lines changed: 3 additions & 0 deletions b/‎src/adapters/ollama.rs‎
Lines changed: 3 additions & 0 deletions
diff --git a/‎src/adapters/openai.rs‎
Lines changed: 98 additions & 3 deletions b/‎src/adapters/openai.rs‎
Lines changed: 98 additions & 3 deletions
diff --git a/‎src/commands/git.rs‎
Lines changed: 2 additions & 0 deletions b/‎src/commands/git.rs‎
Lines changed: 2 additions & 0 deletions
diff --git a/‎src/commands/misc.rs‎
Lines changed: 2 additions & 3 deletions b/‎src/commands/misc.rs‎
Lines changed: 2 additions & 3 deletions
@@ -354,6 +354,7 @@ mod tests {
             user_prompt: "user".to_string(),
             temperature: None,
             max_tokens: None,
+            response_schema: None,
         }
     }
 
@@ -593,6 +594,7 @@ mod tests {
                 user_prompt: "u".to_string(),
                 temperature: Some(0.9),
                 max_tokens: Some(200),
+                response_schema: None,
             })
             .await;
 
 
@@ -47,6 +47,30 @@ pub struct LLMRequest {
     pub user_prompt: String,
     pub temperature: Option<f32>,
     pub max_tokens: Option<usize>,
+    #[serde(default, skip_serializing_if = "Option::is_none")]
+    pub response_schema: Option<StructuredOutputSchema>,
+}
+
+#[derive(Debug, Clone, Serialize, Deserialize, PartialEq)]
+pub struct StructuredOutputSchema {
+    pub name: String,
+    pub schema: serde_json::Value,
+    #[serde(default = "default_true")]
+    pub strict: bool,
+}
+
+impl StructuredOutputSchema {
+    pub fn json_schema(name: impl Into<String>, schema: serde_json::Value) -> Self {
+        Self {
+            name: name.into(),
+            schema,
+            strict: true,
+        }
+    }
+}
+
+fn default_true() -> bool {
+    true
 }
 
 #[derive(Debug, Clone, Serialize, Deserialize)]
@@ -198,6 +222,7 @@ pub trait LLMAdapter: Send + Sync {
             user_prompt,
             temperature: request.temperature,
             max_tokens: request.max_tokens,
+            response_schema: None,
         };
 
         let response = self.complete(llm_request).await?;
@@ -590,6 +615,10 @@ mod tests {
             user_prompt: "Review this diff.".to_string(),
             temperature: Some(0.3),
             max_tokens: Some(2000),
+            response_schema: Some(StructuredOutputSchema::json_schema(
+                "review_comments",
+                serde_json::json!({"type": "array"}),
+            )),
         };
         let tools = vec![ToolDefinition {
             name: "read_file".to_string(),
@@ -602,6 +631,7 @@ mod tests {
         assert_eq!(chat_req.messages[0].role, ChatRole::User);
         assert_eq!(chat_req.tools.len(), 1);
         assert_eq!(chat_req.temperature, Some(0.3));
+        assert_eq!(chat_req.max_tokens, Some(2000));
     }
 
     #[test]
 
@@ -203,6 +203,7 @@ mod tests {
             user_prompt: "user".to_string(),
             temperature: None,
             max_tokens: None,
+            response_schema: None,
         }
     }
 
@@ -446,6 +447,7 @@ mod tests {
             user_prompt: "review this".to_string(),
             temperature: None,
             max_tokens: None,
+            response_schema: None,
         };
         let result = adapter.complete(request).await;
 
@@ -473,6 +475,7 @@ mod tests {
             user_prompt: "user".to_string(),
             temperature: Some(0.5),
             max_tokens: Some(200),
+            response_schema: None,
         };
         let result = adapter.complete(request).await;
 
 
@@ -1,7 +1,7 @@
 use crate::adapters::common;
 use crate::adapters::llm::{
     ChatRequest, ChatResponse, ChatRole, ContentBlock, LLMAdapter, LLMRequest, LLMResponse,
-    ModelConfig, StopReason, Usage,
+    ModelConfig, StopReason, StructuredOutputSchema, Usage,
 };
 use anyhow::{Context, Result};
 use async_trait::async_trait;
@@ -22,6 +22,8 @@ struct OpenAIRequest {
     messages: Vec<Message>,
     temperature: f32,
     max_tokens: usize,
+    #[serde(skip_serializing_if = "Option::is_none")]
+    response_format: Option<OpenAIResponseFormat>,
 }
 
 #[derive(Serialize)]
@@ -45,6 +47,20 @@ struct Message {
     content: String,
 }
 
+#[derive(Serialize)]
+struct OpenAIResponseFormat {
+    #[serde(rename = "type")]
+    format_type: String,
+    json_schema: OpenAIJsonSchemaFormat,
+}
+
+#[derive(Serialize)]
+struct OpenAIJsonSchemaFormat {
+    name: String,
+    schema: serde_json::Value,
+    strict: bool,
+}
+
 #[derive(Deserialize)]
 struct OpenAIResponse {
     choices: Vec<Choice>,
@@ -221,7 +237,15 @@ impl OpenAIAdapter {
 
 #[async_trait]
 impl LLMAdapter for OpenAIAdapter {
-    async fn complete(&self, request: LLMRequest) -> Result<LLMResponse> {
+    async fn complete(&self, mut request: LLMRequest) -> Result<LLMResponse> {
+        if request.response_schema.is_some() {
+            if self.supports_native_response_schema() {
+                return self.complete_chat_completions(request).await;
+            }
+
+            request.response_schema = None;
+        }
+
         if should_use_responses_api(&self.config) {
             return self.complete_responses(request).await;
         }
@@ -482,6 +506,12 @@ fn should_use_responses_api(config: &ModelConfig) -> bool {
 }
 
 impl OpenAIAdapter {
+    fn supports_native_response_schema(&self) -> bool {
+        self.base_url.contains("api.openai.com")
+            || self.base_url.contains("127.0.0.1")
+            || self.base_url.contains("localhost")
+    }
+
     async fn complete_chat_completions(&self, request: LLMRequest) -> Result<LLMResponse> {
         let messages = vec![
             Message {
@@ -499,6 +529,10 @@ impl OpenAIAdapter {
             messages,
             temperature: request.temperature.unwrap_or(self.config.temperature),
             max_tokens: request.max_tokens.unwrap_or(self.config.max_tokens),
+            response_format: request
+                .response_schema
+                .as_ref()
+                .map(to_openai_response_format),
         };
 
         let url = format!("{}/chat/completions", self.base_url);
@@ -576,6 +610,17 @@ impl OpenAIAdapter {
     }
 }
 
+fn to_openai_response_format(schema: &StructuredOutputSchema) -> OpenAIResponseFormat {
+    OpenAIResponseFormat {
+        format_type: "json_schema".to_string(),
+        json_schema: OpenAIJsonSchemaFormat {
+            name: schema.name.clone(),
+            schema: schema.schema.clone(),
+            strict: schema.strict,
+        },
+    }
+}
+
 fn extract_response_text(response: &OpenAIResponsesResponse) -> String {
     let mut combined = String::new();
 
@@ -603,8 +648,9 @@ mod tests {
     use super::*;
     use crate::adapters::llm::{
         ChatMessage, ChatRequest, ContentBlock as CB, LLMAdapter, LLMRequest, ModelConfig,
-        StopReason, ToolDefinition,
+        StopReason, StructuredOutputSchema, ToolDefinition,
     };
+    use mockito::Matcher;
 
     fn test_config(base_url: &str) -> ModelConfig {
         ModelConfig {
@@ -625,9 +671,57 @@ mod tests {
             user_prompt: "user".to_string(),
             temperature: None,
             max_tokens: None,
+            response_schema: None,
         }
     }
 
+    #[tokio::test]
+    async fn test_structured_output_schema_uses_chat_response_format() {
+        let mut server = mockito::Server::new_async().await;
+        let mock = server
+            .mock("POST", "/chat/completions")
+            .match_body(Matcher::PartialJsonString(
+                serde_json::json!({
+                    "response_format": {
+                        "type": "json_schema",
+                        "json_schema": {
+                            "name": "review_findings",
+                            "strict": true
+                        }
+                    }
+                })
+                .to_string(),
+            ))
+            .with_status(200)
+            .with_header("content-type", "application/json")
+            .with_body(
+                r#"{
+                    "choices": [{"message": {"role": "assistant", "content": "[]"}}],
+                    "usage": {"prompt_tokens": 10, "completion_tokens": 5, "total_tokens": 15},
+                    "model": "gpt-4o"
+                }"#,
+            )
+            .create_async()
+            .await;
+
+        let adapter = OpenAIAdapter::new(test_config(&server.url())).unwrap();
+        let result = adapter
+            .complete(LLMRequest {
+                system_prompt: "system".to_string(),
+                user_prompt: "user".to_string(),
+                temperature: None,
+                max_tokens: None,
+                response_schema: Some(StructuredOutputSchema::json_schema(
+                    "review_findings",
+                    serde_json::json!({"type": "array"}),
+                )),
+            })
+            .await;
+
+        assert!(result.is_ok());
+        mock.assert_async().await;
+    }
+
     #[tokio::test]
     async fn test_successful_completion() {
         let mut server = mockito::Server::new_async().await;
@@ -1103,6 +1197,7 @@ mod tests {
                 user_prompt: "u".to_string(),
                 temperature: Some(0.8),
                 max_tokens: Some(500),
+                response_schema: None,
             })
             .await;
 
 
@@ -85,6 +85,7 @@ async fn suggest_commit_message(config: config::Config) -> Result<()> {
         user_prompt,
         temperature: Some(0.3),
         max_tokens: Some(500),
+        response_schema: None,
     };
 
     let response = adapter.complete(request).await?;
@@ -128,6 +129,7 @@ async fn suggest_pr_title(config: config::Config) -> Result<()> {
         user_prompt,
         temperature: Some(0.3),
         max_tokens: Some(200),
+        response_schema: None,
     };
 
     let response = adapter.complete(request).await?;
 
@@ -211,9 +211,7 @@ pub async fn feedback_command(
     );
 
     let is_accepted = action == "accept";
-    for comment in &comments {
-        let _ = review::record_semantic_feedback_example(&config, comment, is_accepted).await;
-    }
+    let _ = review::record_semantic_feedback_examples(&config, &comments, is_accepted).await;
 
     // Also record in the convention store for learned suppression/boost patterns
     let convention_path = resolve_convention_store_path_for_feedback(&config);
@@ -497,6 +495,7 @@ async fn answer_discussion_question(
         user_prompt: prompt,
         temperature: Some(0.2),
         max_tokens: Some(1200),
+        response_schema: None,
     };
 
     let response = adapter.complete(request).await?;
Original file line number	Diff line number	Diff line change
`@@ -354,6 +354,7 @@ mod tests {`
`354`	`354`	`user_prompt: "user".to_string(),`
`355`	`355`	`temperature: None,`
`356`	`356`	`max_tokens: None,`
	`357`	`+ response_schema: None,`
`357`	`358`	`}`
`358`	`359`	`}`
`359`	`360`
`@@ -593,6 +594,7 @@ mod tests {`
`593`	`594`	`user_prompt: "u".to_string(),`
`594`	`595`	`temperature: Some(0.9),`
`595`	`596`	`max_tokens: Some(200),`
	`597`	`+ response_schema: None,`
`596`	`598`	`})`
`597`	`599`	`.await;`
`598`	`600`