Skip to content

Commit 6a69095

Browse files
slister1001Copilot
andauthored
[Evaluation] Fix UTF-8 encoding for red team JSONL files on Windows (#45500)
* Fix UTF-8 encoding for red team JSONL files on Windows Add explicit encoding='utf-8' to all file open() calls in the PyRIT result processing path. Without this, Windows defaults to the system locale encoding (charmap/cp1252), causing UnicodeDecodeError when reading JSONL files containing non-ASCII characters from UnicodeConfusable strategy or CJK languages. Fixes: Tests 1.7 (UnicodeConfusable), 1.16 (Japanese/Chinese) Co-authored-by: Copilot <223556219+Copilot@users.noreply.github.com> * Add encoding regression tests for non-ASCII JSONL round-trip Test CJK characters, Unicode confusables, and mixed scripts to prevent future regressions of the charmap encoding bug on Windows. Co-authored-by: Copilot <223556219+Copilot@users.noreply.github.com> * Format with black Co-authored-by: Copilot <223556219+Copilot@users.noreply.github.com> * Address review comments: test production code paths, consolidate CHANGELOG Co-authored-by: Copilot <223556219+Copilot@users.noreply.github.com> * Apply black formatting Co-authored-by: Copilot <223556219+Copilot@users.noreply.github.com> --------- Co-authored-by: Copilot <223556219+Copilot@users.noreply.github.com>
1 parent 0d944aa commit 6a69095

4 files changed

Lines changed: 115 additions & 4 deletions

File tree

sdk/evaluation/azure-ai-evaluation/CHANGELOG.md

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -3,6 +3,7 @@
33
## 1.16.0 (Unreleased)
44

55
### Bugs Fixed
6+
- Fixed `UnicodeDecodeError` on Windows when reading red team JSONL files containing non-ASCII characters (UnicodeConfusable strategy, CJK languages) by adding explicit `encoding="utf-8"` to all file open calls in the result processing path.
67
- Fixed `NotFoundError: 404` when using `model_config` dict target with Foundry-style endpoints (`*.services.ai.azure.com`) by appending `/openai/v1` to the endpoint URL for PyRIT compatibility.
78
- Fixed red team scan status stuck at `in_progress` in results.json despite the scan completing, by treating leftover `pending` entries as `failed`.
89
- Fixed `ungrounded_attributes` risk category being silently skipped due to a cache key mismatch (`isa` vs `ungrounded_attributes`) in the Foundry execution path.

sdk/evaluation/azure-ai-evaluation/azure/ai/evaluation/red_team/_result_processor.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -199,7 +199,7 @@ def to_red_team_result(
199199
# Process data file to extract conversations
200200
if data_file and os.path.exists(data_file):
201201
try:
202-
with open(data_file, "r") as f:
202+
with open(data_file, "r", encoding="utf-8") as f:
203203
for line in f:
204204
try:
205205
conv_data = json.loads(line)

sdk/evaluation/azure-ai-evaluation/azure/ai/evaluation/red_team/_utils/formatting_utils.py

Lines changed: 3 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -303,7 +303,7 @@ def write_pyrit_outputs_to_file(
303303
if os.path.exists(output_path):
304304
existing_line_count = 0
305305
try:
306-
with open(output_path, "r") as existing_file:
306+
with open(output_path, "r", encoding="utf-8") as existing_file:
307307
existing_line_count = sum(1 for _ in existing_file)
308308

309309
if len(conversations) > existing_line_count:
@@ -335,7 +335,7 @@ def write_pyrit_outputs_to_file(
335335
if risk_sub_type:
336336
conv_dict["risk_sub_type"] = risk_sub_type
337337
json_lines += json.dumps(conv_dict) + "\n"
338-
with Path(output_path).open("w") as f:
338+
with Path(output_path).open("w", encoding="utf-8") as f:
339339
f.writelines(json_lines)
340340
logger.debug(
341341
f"Successfully wrote {len(conversations)-existing_line_count} new conversation(s) to {output_path}"
@@ -375,7 +375,7 @@ def write_pyrit_outputs_to_file(
375375
if risk_sub_type:
376376
conv_dict["risk_sub_type"] = risk_sub_type
377377
json_lines += json.dumps(conv_dict) + "\n"
378-
with Path(output_path).open("w") as f:
378+
with Path(output_path).open("w", encoding="utf-8") as f:
379379
f.writelines(json_lines)
380380
logger.debug(f"Successfully wrote {len(conversations)} conversations to {output_path}")
381381
return str(output_path)

sdk/evaluation/azure-ai-evaluation/tests/unittests/test_redteam/test_formatting_utils.py

Lines changed: 110 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -5,6 +5,7 @@
55
import pytest
66
import math
77
import json
8+
import logging
89
from unittest.mock import patch, MagicMock, mock_open
910
from azure.ai.evaluation.red_team._utils.formatting_utils import (
1011
message_to_dict,
@@ -14,6 +15,7 @@
1415
format_scorecard,
1516
is_none_or_nan,
1617
list_mean_nan_safe,
18+
write_pyrit_outputs_to_file,
1719
)
1820
from azure.ai.evaluation.red_team._attack_strategy import AttackStrategy
1921
from pyrit.models import ChatMessage
@@ -229,3 +231,111 @@ def test_list_mean_nan_safe_empty_after_filtering(self):
229231
"""Test list_mean_nan_safe with a list that is empty after filtering."""
230232
result = list_mean_nan_safe([None, float("nan")])
231233
assert result == 0.0 # Default when no valid values
234+
235+
236+
def _make_mock_pieces(conversation_id, messages):
237+
"""Create mock prompt request pieces for a conversation.
238+
239+
:param conversation_id: The conversation ID to assign to all pieces
240+
:param messages: List of (role, content) tuples
241+
:return: List of mock PromptRequestPiece objects
242+
"""
243+
pieces = []
244+
for role, content in messages:
245+
piece = MagicMock()
246+
piece.conversation_id = conversation_id
247+
piece.original_value = content
248+
piece.labels = {
249+
"context": "",
250+
"tool_calls": [],
251+
"risk_sub_type": None,
252+
"token_usage": None,
253+
}
254+
chat_msg = MagicMock(spec=ChatMessage)
255+
chat_msg.role = role
256+
chat_msg.content = content
257+
piece.to_chat_message.return_value = chat_msg
258+
pieces.append(piece)
259+
return pieces
260+
261+
262+
@pytest.mark.unittest
263+
class TestUnicodeJSONLRoundTrip:
264+
"""Test that JSONL files with non-ASCII content survive write/read round-trips.
265+
266+
Regression tests for the encoding bug where open() without encoding='utf-8'
267+
caused UnicodeDecodeError on Windows for UnicodeConfusable and CJK content.
268+
These tests exercise the production write_pyrit_outputs_to_file code path.
269+
"""
270+
271+
def test_jsonl_roundtrip_cjk_characters(self, tmp_path):
272+
"""Test JSONL round-trip with CJK characters (Japanese, Chinese)."""
273+
output_path = str(tmp_path / "cjk_test.jsonl")
274+
pieces = _make_mock_pieces(
275+
"conv-cjk",
276+
[("user", "これはテストです"), ("assistant", "这是一个测试")],
277+
)
278+
mock_memory = MagicMock()
279+
mock_memory.get_prompt_request_pieces.return_value = pieces
280+
281+
with patch("azure.ai.evaluation.red_team._utils.formatting_utils.CentralMemory") as mock_cm:
282+
mock_cm.get_memory_instance.return_value = mock_memory
283+
write_pyrit_outputs_to_file(
284+
output_path=output_path,
285+
logger=logging.getLogger("test"),
286+
prompt_to_context={},
287+
)
288+
289+
with open(output_path, "r", encoding="utf-8") as f:
290+
data = json.loads(f.readline())
291+
assert data["conversation"]["messages"][0]["content"] == "これはテストです"
292+
assert data["conversation"]["messages"][1]["content"] == "这是一个测试"
293+
294+
def test_jsonl_roundtrip_unicode_confusable(self, tmp_path):
295+
"""Test JSONL round-trip with Unicode confusable characters."""
296+
output_path = str(tmp_path / "confusable_test.jsonl")
297+
confusable_text = "Ⓗⓔⓛⓛⓞ ⓦⓞⓡⓛⓓ"
298+
pieces = _make_mock_pieces(
299+
"conv-confusable",
300+
[("user", confusable_text), ("assistant", "I understand your request.")],
301+
)
302+
mock_memory = MagicMock()
303+
mock_memory.get_prompt_request_pieces.return_value = pieces
304+
305+
with patch("azure.ai.evaluation.red_team._utils.formatting_utils.CentralMemory") as mock_cm:
306+
mock_cm.get_memory_instance.return_value = mock_memory
307+
write_pyrit_outputs_to_file(
308+
output_path=output_path,
309+
logger=logging.getLogger("test"),
310+
prompt_to_context={},
311+
)
312+
313+
with open(output_path, "r", encoding="utf-8") as f:
314+
data = json.loads(f.readline())
315+
assert data["conversation"]["messages"][0]["content"] == confusable_text
316+
317+
def test_jsonl_roundtrip_mixed_scripts(self, tmp_path):
318+
"""Test JSONL round-trip with mixed scripts (Arabic, Cyrillic, emoji)."""
319+
output_path = str(tmp_path / "mixed_test.jsonl")
320+
pieces = _make_mock_pieces(
321+
"conv-mixed",
322+
[
323+
("user", "مرحبا Привет 🔥 café"),
324+
("assistant", "Multi-script response: αβγ"),
325+
],
326+
)
327+
mock_memory = MagicMock()
328+
mock_memory.get_prompt_request_pieces.return_value = pieces
329+
330+
with patch("azure.ai.evaluation.red_team._utils.formatting_utils.CentralMemory") as mock_cm:
331+
mock_cm.get_memory_instance.return_value = mock_memory
332+
write_pyrit_outputs_to_file(
333+
output_path=output_path,
334+
logger=logging.getLogger("test"),
335+
prompt_to_context={},
336+
)
337+
338+
with open(output_path, "r", encoding="utf-8") as f:
339+
data = json.loads(f.readline())
340+
assert "مرحبا" in data["conversation"]["messages"][0]["content"]
341+
assert "αβγ" in data["conversation"]["messages"][1]["content"]

0 commit comments

Comments
 (0)