test: update mock responses to use correct Violated Categories key

nac7 · nac7 · commit 2727e793eb73 · 2026-06-08T16:38:22.000-05:00
Existing tests provided mock NemoGuard JSON responses with the wrong key
Safety Categories. Now that the parser correctly reads Violated Categories,
update all mock response fixtures to match what the real model emits.

The intentional regression tests in test_content_safety_output_parsers.py
that verify Safety Categories no longer extracts data are left unchanged.
diff --git a/tests/guardrails/test_content_safety_iorails_actions.py b/tests/guardrails/test_content_safety_iorails_actions.py
@@ -43,15 +43,15 @@
 UNSAFE_JSON = json.dumps(
     {
         "User Safety": "unsafe",
-        "Safety Categories": "S1: Violence, S3: Criminal Planning/Confessions",
+        "Violated Categories": "S1: Violence, S3: Criminal Planning/Confessions",
     }
 )
 SAFE_OUTPUT_JSON = json.dumps({"User Safety": "safe", "Response Safety": "safe"})
 UNSAFE_OUTPUT_JSON = json.dumps(
     {
         "User Safety": "safe",
         "Response Safety": "unsafe",
-        "Safety Categories": "S17: Malware",
+        "Violated Categories": "S17: Malware",
     }
 )
 
diff --git a/tests/guardrails/test_data.py b/tests/guardrails/test_data.py
@@ -53,7 +53,7 @@
 <END CONVERSATION>
 
 Provide your safety assessment for the above conversation in the below output JSON format:
-{"User Safety": safe or unsafe rating for the user message, "Response Safety": safe or unsafe rating for the agent response. Omit if no agent response present. "Safety Categories": a comma-separated list of applicable safety categories from the provided taxonomy. Omit if all safe.}
+{"User Safety": safe or unsafe rating for the user message, "Response Safety": safe or unsafe rating for the agent response. Omit if no agent response present. "Violated Categories": a comma-separated list of applicable safety categories from the provided taxonomy. Omit if all safe.}
 
 Do not include anything other than the output JSON in your response.
 Output JSON:
@@ -97,7 +97,7 @@
 <END CONVERSATION>
 
 Provide your safety assessment for the above conversation in the below output JSON format:
-{"User Safety": safe or unsafe rating for the user message, "Response Safety": safe or unsafe rating for the agent response. Omit if no agent response present. "Safety Categories": a comma-separated list of applicable safety categories from the provided taxonomy. Omit if all safe.}
+{"User Safety": safe or unsafe rating for the user message, "Response Safety": safe or unsafe rating for the agent response. Omit if no agent response present. "Violated Categories": a comma-separated list of applicable safety categories from the provided taxonomy. Omit if all safe.}
 
 Do not include anything other than the output JSON in your response.
 Output JSON:
diff --git a/tests/guardrails/test_iorails_telemetry.py b/tests/guardrails/test_iorails_telemetry.py
@@ -372,7 +372,7 @@ async def capture_then_pass(messages):
 
 SAFE_INPUT_JSON = json.dumps({"User Safety": "safe"})
 SAFE_OUTPUT_JSON = json.dumps({"User Safety": "safe", "Response Safety": "safe"})
-UNSAFE_INPUT_JSON = json.dumps({"User Safety": "unsafe", "Safety Categories": "S1: Violence"})
+UNSAFE_INPUT_JSON = json.dumps({"User Safety": "unsafe", "Violated Categories": "S1: Violence"})
 
 
 def _stub_deep_pipeline(iorails, main_llm_response="Hello", input_safe=True):
diff --git a/tests/guardrails/test_rails_manager.py b/tests/guardrails/test_rails_manager.py
@@ -41,13 +41,13 @@
 )
 
 SAFE_INPUT_JSON = json.dumps({"User Safety": "safe"})
-UNSAFE_INPUT_JSON = json.dumps({"User Safety": "unsafe", "Safety Categories": "S1: Violence"})
+UNSAFE_INPUT_JSON = json.dumps({"User Safety": "unsafe", "Violated Categories": "S1: Violence"})
 SAFE_OUTPUT_JSON = json.dumps({"User Safety": "safe", "Response Safety": "safe"})
 UNSAFE_OUTPUT_JSON = json.dumps(
     {
         "User Safety": "safe",
         "Response Safety": "unsafe",
-        "Safety Categories": "S17: Malware",
+        "Violated Categories": "S17: Malware",
     }
 )
 MESSAGES = [{"role": "user", "content": "hello"}]
diff --git a/tests/test_content_safety_integration.py b/tests/test_content_safety_integration.py
@@ -161,7 +161,7 @@ async def test_content_safety_input_with_nemoguard_parser_unsafe_with_categories
         self,
     ):
         """Test input action with real nemoguard_parse_prompt_safety parser - unsafe with categories."""
-        json_response = '{"User Safety": "unsafe", "Safety Categories": "S1, S8, S10"}'
+        json_response = '{"User Safety": "unsafe", "Violated Categories": "S1, S8, S10"}'
         parsed_result = nemoguard_parse_prompt_safety(json_response)
         llms, mock_task_manager = _create_mock_setup([json_response], parsed_result)
         context = _create_input_context("Potentially harmful content")
@@ -181,7 +181,7 @@ async def test_content_safety_input_with_nemoguard_parser_unsafe_with_categories
         [
             ('{"Response Safety": "safe"}', True, []),
             (
-                '{"Response Safety": "unsafe", "Safety Categories": "Violence, Hate Speech"}',
+                '{"Response Safety": "unsafe", "Violated Categories": "Violence, Hate Speech"}',
                 False,
                 ["Violence", "Hate Speech"],
             ),
@@ -251,13 +251,13 @@ def test_iterable_unpacking_with_is_content_safe_outputs(self, response, expecte
         [
             ('{"User Safety": "safe"}', True, []),
             (
-                '{"User Safety": "unsafe", "Safety Categories": "S1, S8"}',
+                '{"User Safety": "unsafe", "Violated Categories": "S1, S8"}',
                 False,
                 ["S1", "S8"],
             ),
             ('{"Response Safety": "safe"}', True, []),
             (
-                '{"Response Safety": "unsafe", "Safety Categories": "Violence, Hate"}',
+                '{"Response Safety": "unsafe", "Violated Categories": "Violence, Hate"}',
                 False,
                 ["Violence", "Hate"],
             ),

Original file line number	Diff line number	Diff line change
`@@ -43,15 +43,15 @@`
`43`	`43`	`UNSAFE_JSON = json.dumps(`
`44`	`44`	`{`
`45`	`45`	`"User Safety": "unsafe",`
`46`		`- "Safety Categories": "S1: Violence, S3: Criminal Planning/Confessions",`
	`46`	`+ "Violated Categories": "S1: Violence, S3: Criminal Planning/Confessions",`
`47`	`47`	`}`
`48`	`48`	`)`
`49`	`49`	`SAFE_OUTPUT_JSON = json.dumps({"User Safety": "safe", "Response Safety": "safe"})`
`50`	`50`	`UNSAFE_OUTPUT_JSON = json.dumps(`
`51`	`51`	`{`
`52`	`52`	`"User Safety": "safe",`
`53`	`53`	`"Response Safety": "unsafe",`
`54`		`- "Safety Categories": "S17: Malware",`
	`54`	`+ "Violated Categories": "S17: Malware",`
`55`	`55`	`}`
`56`	`56`	`)`
`57`	`57`
Original file line number	Diff line number	Diff line change
`@@ -41,13 +41,13 @@`
`41`	`41`	`)`
`42`	`42`
`43`	`43`	`SAFE_INPUT_JSON = json.dumps({"User Safety": "safe"})`
`44`		`-UNSAFE_INPUT_JSON = json.dumps({"User Safety": "unsafe", "Safety Categories": "S1: Violence"})`
	`44`	`+UNSAFE_INPUT_JSON = json.dumps({"User Safety": "unsafe", "Violated Categories": "S1: Violence"})`
`45`	`45`	`SAFE_OUTPUT_JSON = json.dumps({"User Safety": "safe", "Response Safety": "safe"})`
`46`	`46`	`UNSAFE_OUTPUT_JSON = json.dumps(`
`47`	`47`	`{`
`48`	`48`	`"User Safety": "safe",`
`49`	`49`	`"Response Safety": "unsafe",`
`50`		`- "Safety Categories": "S17: Malware",`
	`50`	`+ "Violated Categories": "S17: Malware",`
`51`	`51`	`}`
`52`	`52`	`)`
`53`	`53`	`MESSAGES = [{"role": "user", "content": "hello"}]`