NVIDIA-NeMo · nac7 · Jun 8, 2026 · Jun 8, 2026
diff --git a/nemoguardrails/llm/output_parsers.py b/nemoguardrails/llm/output_parsers.py
@@ -160,8 +160,8 @@ def nemoguard_parse_prompt_safety(response: str) -> Sequence[Union[bool, str]]:
         parsed_json_result = json.loads(response)
         assert "User Safety" in parsed_json_result
         result = parsed_json_result["User Safety"].lower()
-        if "Safety Categories" in parsed_json_result:
-            safety_categories = [cat.strip() for cat in parsed_json_result["Safety Categories"].split(",")]
+        if "Violated Categories" in parsed_json_result:
+            safety_categories = [cat.strip() for cat in parsed_json_result["Violated Categories"].split(",")]
         else:
             safety_categories = []
     except Exception:
@@ -199,8 +199,8 @@ def nemoguard_parse_response_safety(response: str) -> Sequence[Union[bool, str]]
         parsed_json_result = json.loads(response)
         assert "Response Safety" in parsed_json_result
         result = parsed_json_result["Response Safety"].lower()
-        if "Safety Categories" in parsed_json_result:
-            safety_categories = [cat.strip() for cat in parsed_json_result["Safety Categories"].split(",")]
+        if "Violated Categories" in parsed_json_result:
+            safety_categories = [cat.strip() for cat in parsed_json_result["Violated Categories"].split(",")]
         else:
             safety_categories = []
     except Exception:

diff --git a/tests/guardrails/test_content_safety_iorails_actions.py b/tests/guardrails/test_content_safety_iorails_actions.py
@@ -43,15 +43,15 @@
 UNSAFE_JSON = json.dumps(
     {
         "User Safety": "unsafe",
-        "Safety Categories": "S1: Violence, S3: Criminal Planning/Confessions",
+        "Violated Categories": "S1: Violence, S3: Criminal Planning/Confessions",
     }
 )
 SAFE_OUTPUT_JSON = json.dumps({"User Safety": "safe", "Response Safety": "safe"})
 UNSAFE_OUTPUT_JSON = json.dumps(
     {
         "User Safety": "safe",
         "Response Safety": "unsafe",
-        "Safety Categories": "S17: Malware",
+        "Violated Categories": "S17: Malware",
     }
 )
 

diff --git a/tests/guardrails/test_data.py b/tests/guardrails/test_data.py
@@ -53,7 +53,7 @@
 <END CONVERSATION>
 
 Provide your safety assessment for the above conversation in the below output JSON format:
-{"User Safety": safe or unsafe rating for the user message, "Response Safety": safe or unsafe rating for the agent response. Omit if no agent response present. "Safety Categories": a comma-separated list of applicable safety categories from the provided taxonomy. Omit if all safe.}
+{"User Safety": safe or unsafe rating for the user message, "Response Safety": safe or unsafe rating for the agent response. Omit if no agent response present. "Violated Categories": a comma-separated list of applicable safety categories from the provided taxonomy. Omit if all safe.}
 
 Do not include anything other than the output JSON in your response.
 Output JSON:
@@ -97,7 +97,7 @@
 <END CONVERSATION>
 
 Provide your safety assessment for the above conversation in the below output JSON format:
-{"User Safety": safe or unsafe rating for the user message, "Response Safety": safe or unsafe rating for the agent response. Omit if no agent response present. "Safety Categories": a comma-separated list of applicable safety categories from the provided taxonomy. Omit if all safe.}
+{"User Safety": safe or unsafe rating for the user message, "Response Safety": safe or unsafe rating for the agent response. Omit if no agent response present. "Violated Categories": a comma-separated list of applicable safety categories from the provided taxonomy. Omit if all safe.}
 
 Do not include anything other than the output JSON in your response.
 Output JSON:

diff --git a/tests/guardrails/test_iorails_telemetry.py b/tests/guardrails/test_iorails_telemetry.py
@@ -372,7 +372,7 @@ async def capture_then_pass(messages):
 
 SAFE_INPUT_JSON = json.dumps({"User Safety": "safe"})
 SAFE_OUTPUT_JSON = json.dumps({"User Safety": "safe", "Response Safety": "safe"})
-UNSAFE_INPUT_JSON = json.dumps({"User Safety": "unsafe", "Safety Categories": "S1: Violence"})
+UNSAFE_INPUT_JSON = json.dumps({"User Safety": "unsafe", "Violated Categories": "S1: Violence"})
 
 
 def _stub_deep_pipeline(iorails, main_llm_response="Hello", input_safe=True):

diff --git a/tests/guardrails/test_rails_manager.py b/tests/guardrails/test_rails_manager.py
@@ -41,13 +41,13 @@
 )
 
 SAFE_INPUT_JSON = json.dumps({"User Safety": "safe"})
-UNSAFE_INPUT_JSON = json.dumps({"User Safety": "unsafe", "Safety Categories": "S1: Violence"})
+UNSAFE_INPUT_JSON = json.dumps({"User Safety": "unsafe", "Violated Categories": "S1: Violence"})
 SAFE_OUTPUT_JSON = json.dumps({"User Safety": "safe", "Response Safety": "safe"})
 UNSAFE_OUTPUT_JSON = json.dumps(
     {
         "User Safety": "safe",
         "Response Safety": "unsafe",
-        "Safety Categories": "S17: Malware",
+        "Violated Categories": "S17: Malware",
     }
 )
 MESSAGES = [{"role": "user", "content": "hello"}]

diff --git a/tests/test_content_safety_integration.py b/tests/test_content_safety_integration.py
@@ -161,7 +161,7 @@ async def test_content_safety_input_with_nemoguard_parser_unsafe_with_categories
         self,
     ):
         """Test input action with real nemoguard_parse_prompt_safety parser - unsafe with categories."""
-        json_response = '{"User Safety": "unsafe", "Safety Categories": "S1, S8, S10"}'
+        json_response = '{"User Safety": "unsafe", "Violated Categories": "S1, S8, S10"}'
         parsed_result = nemoguard_parse_prompt_safety(json_response)
         llms, mock_task_manager = _create_mock_setup([json_response], parsed_result)
         context = _create_input_context("Potentially harmful content")
@@ -181,7 +181,7 @@ async def test_content_safety_input_with_nemoguard_parser_unsafe_with_categories
         [
             ('{"Response Safety": "safe"}', True, []),
             (
-                '{"Response Safety": "unsafe", "Safety Categories": "Violence, Hate Speech"}',
+                '{"Response Safety": "unsafe", "Violated Categories": "Violence, Hate Speech"}',
                 False,
                 ["Violence", "Hate Speech"],
             ),
@@ -251,13 +251,13 @@ def test_iterable_unpacking_with_is_content_safe_outputs(self, response, expecte
         [
             ('{"User Safety": "safe"}', True, []),
             (
-                '{"User Safety": "unsafe", "Safety Categories": "S1, S8"}',
+                '{"User Safety": "unsafe", "Violated Categories": "S1, S8"}',
                 False,
                 ["S1", "S8"],
             ),
             ('{"Response Safety": "safe"}', True, []),
             (
-                '{"Response Safety": "unsafe", "Safety Categories": "Violence, Hate"}',
+                '{"Response Safety": "unsafe", "Violated Categories": "Violence, Hate"}',
                 False,
                 ["Violence", "Hate"],
             ),

diff --git a/tests/test_content_safety_output_parsers.py b/tests/test_content_safety_output_parsers.py
@@ -133,15 +133,22 @@ def test_unsafe_user_content_json(self):
         assert is_safe is False
         assert violated_policies == []
 
-    def test_unsafe_with_safety_categories(self):
-        """Test parsing unsafe content with safety categories."""
-        response = '{"User Safety": "unsafe", "Safety Categories": "S1, S8, S10"}'
+    def test_unsafe_with_violated_categories(self):
+        """Test parsing unsafe content with violation categories (correct NemoGuard key)."""
+        response = '{"User Safety": "unsafe", "Violated Categories": "S1, S8, S10"}'
         is_safe, *violated_policies = nemoguard_parse_prompt_safety(response)
         assert is_safe is False
         assert "S1" in violated_policies
         assert "S8" in violated_policies
         assert "S10" in violated_policies
 
+    def test_wrong_key_safety_categories_yields_no_categories(self):
+        """Regression: old wrong key 'Safety Categories' should not extract categories."""
+        response = '{"User Safety": "unsafe", "Safety Categories": "S1, S8"}'
+        is_safe, *violated_policies = nemoguard_parse_prompt_safety(response)
+        assert is_safe is False
+        assert violated_policies == []
+
     def test_case_insensitive_safety_status(self):
         """Test parsing is case insensitive for safety status."""
         response = '{"User Safety": "SAFE"}'
@@ -151,16 +158,16 @@ def test_case_insensitive_safety_status(self):
 
     def test_categories_with_whitespace_trimming(self):
         """Test parsing categories with extra whitespace gets trimmed."""
-        response = '{"User Safety": "unsafe", "Safety Categories": " S1 , S8 , S10 "}'
+        response = '{"User Safety": "unsafe", "Violated Categories": " S1 , S8 , S10 "}'
         is_safe, *violated_policies = nemoguard_parse_prompt_safety(response)
         assert is_safe is False
         assert "S1" in violated_policies
         assert "S8" in violated_policies
         assert "S10" in violated_policies
 
-    def test_empty_safety_categories(self):
-        """Test parsing with empty safety categories string."""
-        response = '{"User Safety": "unsafe", "Safety Categories": ""}'
+    def test_empty_violated_categories(self):
+        """Test parsing with empty violated categories string."""
+        response = '{"User Safety": "unsafe", "Violated Categories": ""}'
         is_safe, *violated_policies = nemoguard_parse_prompt_safety(response)
         assert is_safe is False
         assert violated_policies == [""]
@@ -187,15 +194,15 @@ def test_missing_user_safety_field(self):
         assert violated_policies == ["JSON parsing failed"]
 
     def test_single_category(self):
-        """Test parsing with single safety category."""
-        response = '{"User Safety": "unsafe", "Safety Categories": "Violence"}'
+        """Test parsing with single violated category."""
+        response = '{"User Safety": "unsafe", "Violated Categories": "Violence"}'
         is_safe, *violated_policies = nemoguard_parse_prompt_safety(response)
         assert is_safe is False
         assert violated_policies == ["Violence"]
 
     def test_complex_category_names(self):
         """Test parsing with descriptive category names."""
-        response = '{"User Safety": "unsafe", "Safety Categories": "Violence, Hate Speech, Sexual Content"}'
+        response = '{"User Safety": "unsafe", "Violated Categories": "Violence, Hate Speech, Sexual Content"}'
         is_safe, *violated_policies = nemoguard_parse_prompt_safety(response)
         assert is_safe is False
         assert "Violence" in violated_policies
@@ -220,15 +227,22 @@ def test_unsafe_response_content_json(self):
         assert is_safe is False
         assert violated_policies == []
 
-    def test_unsafe_with_safety_categories(self):
-        """Test parsing unsafe response with safety categories."""
-        response = '{"Response Safety": "unsafe", "Safety Categories": "S1, S8, S10"}'
+    def test_unsafe_with_violated_categories(self):
+        """Test parsing unsafe response with violated categories (correct NemoGuard key)."""
+        response = '{"Response Safety": "unsafe", "Violated Categories": "S1, S8, S10"}'
         is_safe, *violated_policies = nemoguard_parse_response_safety(response)
         assert is_safe is False
         assert "S1" in violated_policies
         assert "S8" in violated_policies
         assert "S10" in violated_policies
 
+    def test_wrong_key_safety_categories_yields_no_categories(self):
+        """Regression: old wrong key 'Safety Categories' should not extract categories."""
+        response = '{"Response Safety": "unsafe", "Safety Categories": "S1, S8"}'
+        is_safe, *violated_policies = nemoguard_parse_response_safety(response)
+        assert is_safe is False
+        assert violated_policies == []
+
     def test_case_insensitive_safety_status(self):
         """Test parsing is case insensitive for safety status."""
         response = '{"Response Safety": "SAFE"}'
@@ -238,7 +252,7 @@ def test_case_insensitive_safety_status(self):
 
     def test_categories_with_whitespace_trimming(self):
         """Test parsing categories with extra whitespace gets trimmed."""
-        response = '{"Response Safety": "unsafe", "Safety Categories": " S1 , S8 , S10 "}'
+        response = '{"Response Safety": "unsafe", "Violated Categories": " S1 , S8 , S10 "}'
         is_safe, *violated_policies = nemoguard_parse_response_safety(response)
         assert is_safe is False
         assert "S1" in violated_policies
@@ -268,7 +282,7 @@ def test_missing_response_safety_field(self):
 
     def test_full_nemoguard_response(self):
         """Test parsing a full NemoGuard response with both user and response safety."""
-        response = '{"User Safety": "safe", "Response Safety": "unsafe", "Safety Categories": "S1, S8"}'
+        response = '{"User Safety": "safe", "Response Safety": "unsafe", "Violated Categories": "S1, S8"}'
         is_safe, *violated_policies = nemoguard_parse_response_safety(response)
         assert is_safe is False
         assert "S1" in violated_policies
@@ -298,13 +312,13 @@ def test_llama_guard_typical_responses(self):
 
     def test_nemoguard_content_safety_responses(self):
         """Test typical NemoGuard ContentSafety model responses."""
-        response = '{"User Safety": "unsafe", "Safety Categories": "S1: Violence, S8: Hate/Identity Hate"}'
+        response = '{"User Safety": "unsafe", "Violated Categories": "S1: Violence, S8: Hate/Identity Hate"}'
         is_safe, *violated_policies = nemoguard_parse_prompt_safety(response)
         assert is_safe is False
         assert "S1: Violence" in violated_policies
         assert "S8: Hate/Identity Hate" in violated_policies
 
-        response = '{"User Safety": "safe", "Response Safety": "unsafe", "Safety Categories": "S11: Sexual Content"}'
+        response = '{"User Safety": "safe", "Response Safety": "unsafe", "Violated Categories": "S11: Sexual Content"}'
         is_safe, *violated_policies = nemoguard_parse_response_safety(response)
         assert is_safe is False
         assert violated_policies == ["S11: Sexual Content"]
@@ -359,7 +373,7 @@ def test_starred_unpacking_compatibility(self):
         assert is_safe is True
         assert violated_policies == []
 
-        response = '{"Response Safety": "unsafe", "Safety Categories": "S1, S8"}'
+        response = '{"Response Safety": "unsafe", "Violated Categories": "S1, S8"}'
         result = nemoguard_parse_response_safety(response)
         is_safe, *violated_policies = result
         assert is_safe is False