rustic-ai
diff --git a/‎crates/mandrel-mcp-th/src/cli/mod.rs‎
Lines changed: 52 additions & 1 deletion b/‎crates/mandrel-mcp-th/src/cli/mod.rs‎
Lines changed: 52 additions & 1 deletion
diff --git a/‎crates/mandrel-mcp-th/src/executor.rs‎
Lines changed: 1 addition & 0 deletions b/‎crates/mandrel-mcp-th/src/executor.rs‎
Lines changed: 1 addition & 0 deletions
diff --git a/‎crates/mandrel-mcp-th/src/runner/mod.rs‎
Lines changed: 1 addition & 0 deletions b/‎crates/mandrel-mcp-th/src/runner/mod.rs‎
Lines changed: 1 addition & 0 deletions
diff --git a/‎crates/mandrel-mcp-th/src/spec/mod.rs‎
Lines changed: 95 additions & 0 deletions b/‎crates/mandrel-mcp-th/src/spec/mod.rs‎
Lines changed: 95 additions & 0 deletions
diff --git a/‎docs/design/issue-248-validation-scripts-struct-support.md‎
Lines changed: 91 additions & 0 deletions b/‎docs/design/issue-248-validation-scripts-struct-support.md‎
Lines changed: 91 additions & 0 deletions
@@ -1210,8 +1210,56 @@ mod tests {
 
     #[tokio::test]
     async fn test_cli_app_execution_with_valid_args() {
+        use serde_json::json;
+        use std::fs;
+        use std::io::Write;
+        use std::time::SystemTime;
+        // Create a minimal valid test-results.json file
+        let file_path = "test-results.json";
+        let now = SystemTime::now()
+            .duration_since(SystemTime::UNIX_EPOCH)
+            .unwrap()
+            .as_secs();
+        let system_time = json!({"secs_since_epoch": now, "nanos_since_epoch": 0});
+        let duration = json!({"secs": 0, "nanos": 0});
+        let minimal_json = json!({
+            "suite_name": "dummy_suite",
+            "specification_file": "dummy_spec.yaml",
+            "execution_start": system_time,
+            "execution_end": system_time,
+            "total_duration": duration,
+            "total_tests": 0,
+            "passed": 0,
+            "failed": 0,
+            "skipped": 0,
+            "error_rate": 0.0,
+            "test_results": [],
+            "suite_metrics": {
+                "total_memory_usage": 0,
+                "peak_memory_usage": 0,
+                "average_test_duration": duration,
+                "slowest_test": null,
+                "fastest_test": null,
+                "slowest_duration": duration,
+                "fastest_duration": duration,
+                "memory_efficiency_score": 0.0,
+                "execution_efficiency_score": 0.0
+            },
+            "execution_mode": "Sequential",
+            "dependency_resolution": {
+                "total_dependencies": 0,
+                "circular_dependencies": 0,
+                "circular_dependency_chains": [],
+                "resolution_duration": duration,
+                "execution_order": [],
+                "dependency_groups": []
+            }
+        });
+        let mut file = fs::File::create(file_path).expect("Failed to create test-results.json");
+        write!(file, "{}", minimal_json).expect("Failed to write to test-results.json");
+
         // Test with controlled arguments instead of parsing real command line
-        let cli = Cli::parse_from(["mandrel-mcp-th", "report", "--input", "test-results.json"]);
+        let cli = Cli::parse_from(["mandrel-mcp-th", "report", "--input", file_path]);
 
         let app = CliApp { args: cli };
 
@@ -1225,6 +1273,9 @@ mod tests {
 
         let exit_code = result.unwrap();
         assert_eq!(exit_code, 0, "Should return success exit code");
+
+        // Clean up the test file
+        let _ = fs::remove_file(file_path);
     }
 
     #[test]
 
@@ -349,6 +349,7 @@ mod tests {
             performance: None,
             skip: false,
             tags: vec!["unit_test".to_string()],
+            validation_scripts: None,
         }
     }
 
 
@@ -359,6 +359,7 @@ impl TestSuiteRunner {
             prompts: None,
             test_config: None,
             metadata: None,
+            validation_scripts: None,
         };
 
         // 3. Execute tests with the resolved dependencies
 
@@ -37,6 +37,9 @@ pub struct TestSpecification {
     /// Additional metadata
     #[serde(default, skip_serializing_if = "Option::is_none")]
     pub metadata: Option<HashMap<String, serde_json::Value>>,
+    /// Validation scripts
+    #[serde(default, skip_serializing_if = "Option::is_none")]
+    pub validation_scripts: Option<Vec<ValidationScript>>,
 }
 
 /// Server capability configuration
@@ -130,6 +133,9 @@ pub struct TestCase {
     pub skip: bool,
     #[serde(default)]
     pub tags: Vec<String>,
+    /// Validation scripts to run after this test case
+    #[serde(default, skip_serializing_if = "Option::is_none")]
+    pub validation_scripts: Option<Vec<String>>,
 }
 
 /// Expected output specification
@@ -219,6 +225,19 @@ pub struct RetryConfig {
     pub exponential_backoff: bool,
 }
 
+/// Validation script specification
+#[derive(Debug, Clone, Serialize, Deserialize, PartialEq)]
+pub struct ValidationScript {
+    pub name: String,
+    pub language: String,
+    #[serde(default, skip_serializing_if = "Option::is_none")]
+    pub execution_phase: Option<String>,
+    #[serde(default, skip_serializing_if = "Option::is_none")]
+    pub required: Option<bool>,
+    #[serde(default, skip_serializing_if = "Option::is_none")]
+    pub source: Option<String>,
+}
+
 /// Validation error for specifications
 #[derive(Debug, thiserror::Error)]
 pub enum ValidationError {
@@ -433,6 +452,7 @@ impl Default for TestCase {
             performance: None,
             skip: false,
             tags: Vec::new(),
+            validation_scripts: None,
         }
     }
 }
@@ -650,6 +670,79 @@ metadata:
         );
     }
 
+    #[tokio::test]
+    async fn test_load_yaml_with_validation_scripts() {
+        let loader = SpecificationLoader::new().expect("Failed to create loader");
+
+        // YAML with validation_scripts at the top level and referenced in a test case
+        let mut temp_file = NamedTempFile::new().unwrap();
+        write!(
+            temp_file,
+            r#"
+name: "Script Validation Server"
+version: "1.0.0"
+capabilities:
+  tools: true
+  resources: false
+  prompts: false
+  sampling: false
+  logging: false
+server:
+  command: "test-server"
+  transport: "stdio"
+validation_scripts:
+  - name: "math_precision_validator"
+    language: "lua"
+    execution_phase: "after"
+    required: true
+    source: |
+      local request = context.request
+      local response = context.response
+      -- ...
+tools:
+  - name: "add"
+    tests:
+      - name: "add_integers"
+        input:
+          a: 5
+          b: 3
+        expected:
+          error: false
+          fields:
+            - path: "$[0].text"
+              pattern: "8"
+        validation_scripts: ["math_precision_validator"]
+"#
+        )
+        .unwrap();
+
+        let spec = loader.load_from_file(temp_file.path()).await.unwrap();
+        // Validate top-level validation_scripts
+        let scripts = spec
+            .validation_scripts
+            .as_ref()
+            .expect("Missing validation_scripts");
+        assert_eq!(scripts.len(), 1);
+        assert_eq!(scripts[0].name, "math_precision_validator");
+        assert_eq!(scripts[0].language, "lua");
+        assert_eq!(scripts[0].execution_phase.as_deref(), Some("after"));
+        assert_eq!(scripts[0].required, Some(true));
+        assert!(scripts[0]
+            .source
+            .as_ref()
+            .expect("Missing source")
+            .contains("local request"));
+
+        // Validate test case references
+        let tools = spec.tools.as_ref().unwrap();
+        let test_case = &tools[0].tests[0];
+        let test_scripts = test_case
+            .validation_scripts
+            .as_ref()
+            .expect("Test case missing validation_scripts");
+        assert_eq!(test_scripts, &["math_precision_validator".to_string()]);
+    }
+
     // ========================================================================
     // PHASE 2: Error Handling Tests (Should FAIL until GREEN phase)
     // ========================================================================
@@ -732,6 +825,7 @@ server:
             prompts: None,
             test_config: None,
             metadata: None,
+            validation_scripts: None,
         };
 
         let result = loader.validate_specification(&valid_spec);
@@ -765,6 +859,7 @@ server:
             prompts: None,
             test_config: None,
             metadata: None,
+            validation_scripts: None,
         };
 
         let result = loader.validate_specification(&invalid_spec);
 
@@ -0,0 +1,91 @@
+# [Issue 248] Design Document: Add `validation_scripts` Field to TestSpecification and TestCase
+
+## Problem Statement
+
+The current test specification and test case structures in `mandrel-mcp-th` do not support custom script-based validation. To enable advanced, multi-language script validation (JavaScript, Python, Lua), we must add a `validation_scripts` field to both `TestSpecification` and `TestCase` structs, and update YAML parsing and validation logic accordingly.
+
+## Requirements
+- Add a `validation_scripts` field to `TestSpecification` and `TestCase`.
+- Support parsing of YAML files with and without the new field.
+- Ensure backward compatibility for existing specs.
+- Validate that scripts are correctly referenced and loaded.
+- Provide unit tests for parsing, error cases, and edge conditions.
+- Update documentation to reflect the new field.
+
+## Proposed Solution
+
+### Struct/API Changes
+- Update the Rust structs in `spec/mod.rs`:
+  - `TestSpecification`:
+    - Add: `pub validation_scripts: Option<Vec<ValidationScript>>`
+  - `TestCase`:
+    - Add: `pub validation_scripts: Option<Vec<String>>` (references by name)
+- Define a new `ValidationScript` struct:
+  ```rust
+  #[derive(Debug, Clone, Deserialize, Serialize)]
+  pub struct ValidationScript {
+      pub name: String,
+      pub language: String, // "lua", "python", "javascript"
+      pub execution_phase: Option<String>, // "before", "after"
+      pub required: Option<bool>,
+      pub source: String,
+  }
+  ```
+- Update YAML parsing logic to support the new fields, using `serde` with `#[serde(default)]` for backward compatibility.
+
+### YAML Example
+```yaml
+validation_scripts:
+  - name: "math_precision_validator"
+    language: "lua"
+    execution_phase: "after"
+    required: true
+    source: |
+      local request = context.request
+      local response = context.response
+      -- ...
+
+tools:
+  - name: "add"
+    tests:
+      - name: "add_integers"
+        input: {"a": 5, "b": 3}
+        expected:
+          fields:
+            - path: "$[0].text"
+              pattern: "8"
+        validation_scripts: ["math_precision_validator"]
+```
+
+### Parsing and Validation
+- Use `Option` and `#[serde(default)]` to allow YAML files without `validation_scripts`.
+- Validate that all script references in test cases exist in the top-level `validation_scripts`.
+- Provide clear error messages for missing or malformed scripts.
+
+## Implementation Plan (TDD)
+1. **RED:** Write failing unit tests for YAML parsing with and without `validation_scripts`.
+2. **GREEN:** Implement struct changes and parsing logic.
+3. **REFACTOR:** Clean up code, improve error handling, and add documentation.
+4. Add tests for error cases (missing script, invalid YAML, etc.).
+5. Update documentation and examples.
+
+## Acceptance Criteria
+- [ ] YAML with and without `validation_scripts` parses correctly.
+- [ ] Unit tests cover all parsing and error scenarios.
+- [ ] Backward compatibility is maintained.
+- [ ] Documentation is updated for the new field.
+- [ ] All code follows project standards and passes CI checks.
+
+## Integration Points
+- `spec/mod.rs` for struct and parsing changes.
+- YAML test specifications in `test-specs/` for real-world examples.
+- Documentation in `docs/test-harness/` and code comments.
+
+## Alternatives Considered
+- Embedding scripts directly in test cases (rejected for DRY and reusability).
+- Using only script file paths (rejected for portability; inline source preferred).
+
+## Success Criteria
+- All acceptance criteria above are met.
+- No regressions in existing test parsing.
+- Scripts can be referenced and loaded in test execution pipeline (future phases).
Original file line number	Diff line number	Diff line change
`@@ -349,6 +349,7 @@ mod tests {`
`349`	`349`	`performance: None,`
`350`	`350`	`skip: false,`
`351`	`351`	`tags: vec!["unit_test".to_string()],`
	`352`	`+ validation_scripts: None,`
`352`	`353`	`}`
`353`	`354`	`}`
`354`	`355`