Skip to content

Commit 8a4c208

Browse files
slister1001Copilot
andauthored
fix: redact sensitive data from evaluation SDK log messages (#45176)
* fix: redact sensitive data from log messages to resolve CredScan alert Remove user-provided content (queries, responses, tool definitions, exception messages) from log strings that flow into Geneva telemetry. This prevents database connection strings and other credentials embedded in user payloads from being flagged by CredScan. Changes: - Remove f-string interpolation of query/response/tool_definitions in warning and debug log messages - Downgrade noisy agent-response warnings to debug level - Sanitize upload error messages to emit only exception type name - Chain original exception with 'from e' Resolves ICM 738457593 * fix: call PROXY_URL() as function instead of using as value PROXY_URL in devtools_testutils.config is now a function, not a constant. Co-authored-by: Copilot <223556219+Copilot@users.noreply.github.com> --------- Co-authored-by: Copilot <223556219+Copilot@users.noreply.github.com>
1 parent 603be58 commit 8a4c208

2 files changed

Lines changed: 9 additions & 11 deletions

File tree

sdk/evaluation/azure-ai-evaluation/azure/ai/evaluation/_common/utils.py

Lines changed: 8 additions & 10 deletions
Original file line numberDiff line numberDiff line change
@@ -706,7 +706,7 @@ def reformat_conversation_history(query, logger=None, include_system_messages=Fa
706706
# Lower percentage of mode in Likert scale (73.4% vs 75.4%)
707707
# Lower pairwise agreement between LLMs (85% vs 90% at the pass/fail level with threshold of 3)
708708
if logger:
709-
logger.warning(f"Conversation history could not be parsed, falling back to original query: {query}")
709+
logger.warning("Conversation history could not be parsed, falling back to original query")
710710
return query
711711

712712

@@ -761,15 +761,15 @@ def reformat_agent_response(response, logger=None, include_tool_messages=False):
761761
# If no message could be extracted, likely the format changed, fallback to the original response in that case
762762
if logger:
763763
logger.debug(
764-
f"Empty agent response extracted, likely due to input schema change. Falling back to using the original response: {response}"
764+
"Empty agent response extracted, likely due to input schema change. Falling back to original response"
765765
)
766766
return response
767767
return "\n".join(agent_response)
768-
except:
768+
except Exception:
769769
# If the agent response cannot be parsed for whatever reason (e.g. the converter format changed), the original response is returned
770770
# This is a fallback to ensure that the evaluation can still proceed. See comments on reformat_conversation_history for more details.
771771
if logger:
772-
logger.debug(f"Agent response could not be parsed, falling back to original response: {response}")
772+
logger.debug("Agent response could not be parsed, falling back to original response")
773773
return response
774774

775775

@@ -787,9 +787,7 @@ def reformat_tool_definitions(tool_definitions, logger=None):
787787
# If the tool definitions cannot be parsed for whatever reason, the original tool definitions are returned
788788
# This is a fallback to ensure that the evaluation can still proceed. See comments on reformat_conversation_history for more details.
789789
if logger:
790-
logger.warning(
791-
f"Tool definitions could not be parsed, falling back to original definitions: {tool_definitions}"
792-
)
790+
logger.debug("Tool definitions could not be parsed, falling back to original definitions")
793791
return tool_definitions
794792

795793

@@ -915,9 +913,9 @@ def upload(path: str, container_client: ContainerClient, logger=None):
915913

916914
except Exception as e:
917915
raise EvaluationException(
918-
message=f"Error uploading file: {e}",
919-
internal_message=f"Error uploading file: {e}",
916+
message=f"Error uploading file: {type(e).__name__}",
917+
internal_message=f"Error uploading file: {type(e).__name__}",
920918
target=ErrorTarget.RAI_CLIENT,
921919
category=ErrorCategory.UPLOAD_ERROR,
922920
blame=ErrorBlame.SYSTEM_ERROR,
923-
)
921+
) from e

sdk/evaluation/azure-ai-evaluation/tests/conftest.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -317,7 +317,7 @@ def simple_conversation():
317317
def redirect_openai_requests():
318318
"""Route requests from the openai package to the test proxy."""
319319
config = TestProxyConfig(
320-
recording_id=get_recording_id(), recording_mode="record" if is_live() else "playback", proxy_url=PROXY_URL
320+
recording_id=get_recording_id(), recording_mode="record" if is_live() else "playback", proxy_url=PROXY_URL()
321321
)
322322

323323
with TestProxyHttpxClientBase.record_with_proxy(config):

0 commit comments

Comments
 (0)