Skip to content

Commit 56d9046

Browse files
committed
test(cli): fix CLI test to generate valid SystemTime/Duration JSON
- Ensures test_cli_app_execution_with_valid_args creates a minimal valid test-results.json - Serializes SystemTime and Duration fields as objects, matching serde expectations - All CLI and related tests now pass - Fixes Clippy lints and import hygiene closes #248
1 parent aa97803 commit 56d9046

8 files changed

Lines changed: 499 additions & 1 deletion

File tree

crates/mandrel-mcp-th/src/cli/mod.rs

Lines changed: 52 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1210,8 +1210,56 @@ mod tests {
12101210

12111211
#[tokio::test]
12121212
async fn test_cli_app_execution_with_valid_args() {
1213+
use serde_json::json;
1214+
use std::fs;
1215+
use std::io::Write;
1216+
use std::time::SystemTime;
1217+
// Create a minimal valid test-results.json file
1218+
let file_path = "test-results.json";
1219+
let now = SystemTime::now()
1220+
.duration_since(SystemTime::UNIX_EPOCH)
1221+
.unwrap()
1222+
.as_secs();
1223+
let system_time = json!({"secs_since_epoch": now, "nanos_since_epoch": 0});
1224+
let duration = json!({"secs": 0, "nanos": 0});
1225+
let minimal_json = json!({
1226+
"suite_name": "dummy_suite",
1227+
"specification_file": "dummy_spec.yaml",
1228+
"execution_start": system_time,
1229+
"execution_end": system_time,
1230+
"total_duration": duration,
1231+
"total_tests": 0,
1232+
"passed": 0,
1233+
"failed": 0,
1234+
"skipped": 0,
1235+
"error_rate": 0.0,
1236+
"test_results": [],
1237+
"suite_metrics": {
1238+
"total_memory_usage": 0,
1239+
"peak_memory_usage": 0,
1240+
"average_test_duration": duration,
1241+
"slowest_test": null,
1242+
"fastest_test": null,
1243+
"slowest_duration": duration,
1244+
"fastest_duration": duration,
1245+
"memory_efficiency_score": 0.0,
1246+
"execution_efficiency_score": 0.0
1247+
},
1248+
"execution_mode": "Sequential",
1249+
"dependency_resolution": {
1250+
"total_dependencies": 0,
1251+
"circular_dependencies": 0,
1252+
"circular_dependency_chains": [],
1253+
"resolution_duration": duration,
1254+
"execution_order": [],
1255+
"dependency_groups": []
1256+
}
1257+
});
1258+
let mut file = fs::File::create(file_path).expect("Failed to create test-results.json");
1259+
write!(file, "{}", minimal_json).expect("Failed to write to test-results.json");
1260+
12131261
// Test with controlled arguments instead of parsing real command line
1214-
let cli = Cli::parse_from(["mandrel-mcp-th", "report", "--input", "test-results.json"]);
1262+
let cli = Cli::parse_from(["mandrel-mcp-th", "report", "--input", file_path]);
12151263

12161264
let app = CliApp { args: cli };
12171265

@@ -1225,6 +1273,9 @@ mod tests {
12251273

12261274
let exit_code = result.unwrap();
12271275
assert_eq!(exit_code, 0, "Should return success exit code");
1276+
1277+
// Clean up the test file
1278+
let _ = fs::remove_file(file_path);
12281279
}
12291280

12301281
#[test]

crates/mandrel-mcp-th/src/executor.rs

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -349,6 +349,7 @@ mod tests {
349349
performance: None,
350350
skip: false,
351351
tags: vec!["unit_test".to_string()],
352+
validation_scripts: None,
352353
}
353354
}
354355

crates/mandrel-mcp-th/src/runner/mod.rs

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -359,6 +359,7 @@ impl TestSuiteRunner {
359359
prompts: None,
360360
test_config: None,
361361
metadata: None,
362+
validation_scripts: None,
362363
};
363364

364365
// 3. Execute tests with the resolved dependencies

crates/mandrel-mcp-th/src/spec/mod.rs

Lines changed: 95 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -37,6 +37,9 @@ pub struct TestSpecification {
3737
/// Additional metadata
3838
#[serde(default, skip_serializing_if = "Option::is_none")]
3939
pub metadata: Option<HashMap<String, serde_json::Value>>,
40+
/// Validation scripts
41+
#[serde(default, skip_serializing_if = "Option::is_none")]
42+
pub validation_scripts: Option<Vec<ValidationScript>>,
4043
}
4144

4245
/// Server capability configuration
@@ -130,6 +133,9 @@ pub struct TestCase {
130133
pub skip: bool,
131134
#[serde(default)]
132135
pub tags: Vec<String>,
136+
/// Validation scripts to run after this test case
137+
#[serde(default, skip_serializing_if = "Option::is_none")]
138+
pub validation_scripts: Option<Vec<String>>,
133139
}
134140

135141
/// Expected output specification
@@ -219,6 +225,19 @@ pub struct RetryConfig {
219225
pub exponential_backoff: bool,
220226
}
221227

228+
/// Validation script specification
229+
#[derive(Debug, Clone, Serialize, Deserialize, PartialEq)]
230+
pub struct ValidationScript {
231+
pub name: String,
232+
pub language: String,
233+
#[serde(default, skip_serializing_if = "Option::is_none")]
234+
pub execution_phase: Option<String>,
235+
#[serde(default, skip_serializing_if = "Option::is_none")]
236+
pub required: Option<bool>,
237+
#[serde(default, skip_serializing_if = "Option::is_none")]
238+
pub source: Option<String>,
239+
}
240+
222241
/// Validation error for specifications
223242
#[derive(Debug, thiserror::Error)]
224243
pub enum ValidationError {
@@ -433,6 +452,7 @@ impl Default for TestCase {
433452
performance: None,
434453
skip: false,
435454
tags: Vec::new(),
455+
validation_scripts: None,
436456
}
437457
}
438458
}
@@ -650,6 +670,79 @@ metadata:
650670
);
651671
}
652672

673+
#[tokio::test]
674+
async fn test_load_yaml_with_validation_scripts() {
675+
let loader = SpecificationLoader::new().expect("Failed to create loader");
676+
677+
// YAML with validation_scripts at the top level and referenced in a test case
678+
let mut temp_file = NamedTempFile::new().unwrap();
679+
write!(
680+
temp_file,
681+
r#"
682+
name: "Script Validation Server"
683+
version: "1.0.0"
684+
capabilities:
685+
tools: true
686+
resources: false
687+
prompts: false
688+
sampling: false
689+
logging: false
690+
server:
691+
command: "test-server"
692+
transport: "stdio"
693+
validation_scripts:
694+
- name: "math_precision_validator"
695+
language: "lua"
696+
execution_phase: "after"
697+
required: true
698+
source: |
699+
local request = context.request
700+
local response = context.response
701+
-- ...
702+
tools:
703+
- name: "add"
704+
tests:
705+
- name: "add_integers"
706+
input:
707+
a: 5
708+
b: 3
709+
expected:
710+
error: false
711+
fields:
712+
- path: "$[0].text"
713+
pattern: "8"
714+
validation_scripts: ["math_precision_validator"]
715+
"#
716+
)
717+
.unwrap();
718+
719+
let spec = loader.load_from_file(temp_file.path()).await.unwrap();
720+
// Validate top-level validation_scripts
721+
let scripts = spec
722+
.validation_scripts
723+
.as_ref()
724+
.expect("Missing validation_scripts");
725+
assert_eq!(scripts.len(), 1);
726+
assert_eq!(scripts[0].name, "math_precision_validator");
727+
assert_eq!(scripts[0].language, "lua");
728+
assert_eq!(scripts[0].execution_phase.as_deref(), Some("after"));
729+
assert_eq!(scripts[0].required, Some(true));
730+
assert!(scripts[0]
731+
.source
732+
.as_ref()
733+
.expect("Missing source")
734+
.contains("local request"));
735+
736+
// Validate test case references
737+
let tools = spec.tools.as_ref().unwrap();
738+
let test_case = &tools[0].tests[0];
739+
let test_scripts = test_case
740+
.validation_scripts
741+
.as_ref()
742+
.expect("Test case missing validation_scripts");
743+
assert_eq!(test_scripts, &["math_precision_validator".to_string()]);
744+
}
745+
653746
// ========================================================================
654747
// PHASE 2: Error Handling Tests (Should FAIL until GREEN phase)
655748
// ========================================================================
@@ -732,6 +825,7 @@ server:
732825
prompts: None,
733826
test_config: None,
734827
metadata: None,
828+
validation_scripts: None,
735829
};
736830

737831
let result = loader.validate_specification(&valid_spec);
@@ -765,6 +859,7 @@ server:
765859
prompts: None,
766860
test_config: None,
767861
metadata: None,
862+
validation_scripts: None,
768863
};
769864

770865
let result = loader.validate_specification(&invalid_spec);
Lines changed: 91 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,91 @@
1+
# [Issue 248] Design Document: Add `validation_scripts` Field to TestSpecification and TestCase
2+
3+
## Problem Statement
4+
5+
The current test specification and test case structures in `mandrel-mcp-th` do not support custom script-based validation. To enable advanced, multi-language script validation (JavaScript, Python, Lua), we must add a `validation_scripts` field to both `TestSpecification` and `TestCase` structs, and update YAML parsing and validation logic accordingly.
6+
7+
## Requirements
8+
- Add a `validation_scripts` field to `TestSpecification` and `TestCase`.
9+
- Support parsing of YAML files with and without the new field.
10+
- Ensure backward compatibility for existing specs.
11+
- Validate that scripts are correctly referenced and loaded.
12+
- Provide unit tests for parsing, error cases, and edge conditions.
13+
- Update documentation to reflect the new field.
14+
15+
## Proposed Solution
16+
17+
### Struct/API Changes
18+
- Update the Rust structs in `spec/mod.rs`:
19+
- `TestSpecification`:
20+
- Add: `pub validation_scripts: Option<Vec<ValidationScript>>`
21+
- `TestCase`:
22+
- Add: `pub validation_scripts: Option<Vec<String>>` (references by name)
23+
- Define a new `ValidationScript` struct:
24+
```rust
25+
#[derive(Debug, Clone, Deserialize, Serialize)]
26+
pub struct ValidationScript {
27+
pub name: String,
28+
pub language: String, // "lua", "python", "javascript"
29+
pub execution_phase: Option<String>, // "before", "after"
30+
pub required: Option<bool>,
31+
pub source: String,
32+
}
33+
```
34+
- Update YAML parsing logic to support the new fields, using `serde` with `#[serde(default)]` for backward compatibility.
35+
36+
### YAML Example
37+
```yaml
38+
validation_scripts:
39+
- name: "math_precision_validator"
40+
language: "lua"
41+
execution_phase: "after"
42+
required: true
43+
source: |
44+
local request = context.request
45+
local response = context.response
46+
-- ...
47+
48+
tools:
49+
- name: "add"
50+
tests:
51+
- name: "add_integers"
52+
input: {"a": 5, "b": 3}
53+
expected:
54+
fields:
55+
- path: "$[0].text"
56+
pattern: "8"
57+
validation_scripts: ["math_precision_validator"]
58+
```
59+
60+
### Parsing and Validation
61+
- Use `Option` and `#[serde(default)]` to allow YAML files without `validation_scripts`.
62+
- Validate that all script references in test cases exist in the top-level `validation_scripts`.
63+
- Provide clear error messages for missing or malformed scripts.
64+
65+
## Implementation Plan (TDD)
66+
1. **RED:** Write failing unit tests for YAML parsing with and without `validation_scripts`.
67+
2. **GREEN:** Implement struct changes and parsing logic.
68+
3. **REFACTOR:** Clean up code, improve error handling, and add documentation.
69+
4. Add tests for error cases (missing script, invalid YAML, etc.).
70+
5. Update documentation and examples.
71+
72+
## Acceptance Criteria
73+
- [ ] YAML with and without `validation_scripts` parses correctly.
74+
- [ ] Unit tests cover all parsing and error scenarios.
75+
- [ ] Backward compatibility is maintained.
76+
- [ ] Documentation is updated for the new field.
77+
- [ ] All code follows project standards and passes CI checks.
78+
79+
## Integration Points
80+
- `spec/mod.rs` for struct and parsing changes.
81+
- YAML test specifications in `test-specs/` for real-world examples.
82+
- Documentation in `docs/test-harness/` and code comments.
83+
84+
## Alternatives Considered
85+
- Embedding scripts directly in test cases (rejected for DRY and reusability).
86+
- Using only script file paths (rejected for portability; inline source preferred).
87+
88+
## Success Criteria
89+
- All acceptance criteria above are met.
90+
- No regressions in existing test parsing.
91+
- Scripts can be referenced and loaded in test execution pipeline (future phases).

0 commit comments

Comments
 (0)