From 5e128e1dcb852e91a4dd23b2efb2b4695eb5eaf7 Mon Sep 17 00:00:00 2001
From: Waqas Javed <7674577+w-javed@users.noreply.github.com>
Date: Tue, 19 May 2026 12:25:00 -0700
Subject: [PATCH 1/3] Bump azure-ai-evaluation version to 1.16.8

Co-authored-by: Copilot <223556219+Copilot@users.noreply.github.com>
---
 .../azure-ai-evaluation/azure/ai/evaluation/_version.py         | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)
diff --git a/sdk/evaluation/azure-ai-evaluation/azure/ai/evaluation/_version.py b/sdk/evaluation/azure-ai-evaluation/azure/ai/evaluation/_version.py
index 7f7138b3715f..f8c5d12c048c 100644
--- a/sdk/evaluation/azure-ai-evaluation/azure/ai/evaluation/_version.py
+++ b/sdk/evaluation/azure-ai-evaluation/azure/ai/evaluation/_version.py
@@ -3,4 +3,4 @@
 # ---------------------------------------------------------
 # represents upcoming version
 
-VERSION = "1.16.7"
+VERSION = "1.16.8"

From a8f8bcf64a4744f800c820ee63e6e296ab99d5bf Mon Sep 17 00:00:00 2001
From: Waqas Javed <7674577+w-javed@users.noreply.github.com>
Date: Wed, 20 May 2026 13:40:07 -0700
Subject: [PATCH 2/3] Add 1.16.8 and 1.16.9 (Unreleased) changelog sections for
 azure-ai-evaluation

Co-authored-by: Copilot <223556219+Copilot@users.noreply.github.com>
---
 .../azure-ai-evaluation/CHANGELOG.md          | 24 +++++++++++++------
 1 file changed, 17 insertions(+), 7 deletions(-)

diff --git a/sdk/evaluation/azure-ai-evaluation/CHANGELOG.md b/sdk/evaluation/azure-ai-evaluation/CHANGELOG.md
index b3416a30ec64..ac7aa24e22ec 100644
--- a/sdk/evaluation/azure-ai-evaluation/CHANGELOG.md
+++ b/sdk/evaluation/azure-ai-evaluation/CHANGELOG.md
@@ -1,6 +1,22 @@
 # Release History
 
-## 1.16.7 (Unreleased)
+## 1.16.9 (Unreleased)
+
+### Breaking Changes
+
+- Updated `EVALUATOR_NAME_METRICS_MAPPINGS` so `document_retrieval` and `rouge_score` report single primary metrics (`document_retrieval`, `rouge`), with previous sub-metrics now represented in each evaluator's `*_properties` payload.
+
+### Bugs Fixed
+
+- Fixed `format_llm_response` raising `UnboundLocalError` when `inputs` was not provided by ensuring `sample_input` is always initialized.
+
+## 1.16.8 (2026-05-19)
+
+### Features Added
+
+- App Insights logging now forwards arbitrary evaluator-specific keys from each event's `properties` payload as a single `gen_ai.evaluation.properties` JSON attribute (carried inside `internal_properties`). Previously only the four red-team keys (`attack_success`, `attack_technique`, `attack_complexity`, `attack_success_threshold`) were forwarded; structured outputs such as rubric `dimension_scores` were silently dropped. Payloads larger than 7500 characters are replaced with a valid JSON marker (`{"truncated": true, "original_size_bytes": <n>}`) so consumers can always `json.loads` the value. Non-dict `properties` payloads are now safely ignored instead of raising in the red-team forwarder.
+
+## 1.16.7 (2026-05-07)
 
 ### Features Added
 
@@ -9,11 +25,6 @@
 - Added `status` field (`"completed"`, `"error"`, `"skipped"`) on evaluation result items to indicate evaluator execution outcome.
 - Added `skipped` and `errored` counts to `result_counts` and `per_testing_criteria_results` in AOAI evaluation summaries.
 - Added `skipped` to `ResultCount` and `skipped`/`errored` to `PerTestingCriteriaResult` typed contracts.
-- App Insights logging now forwards arbitrary evaluator-specific keys from each event's `properties` payload as a single `gen_ai.evaluation.properties` JSON attribute (carried inside `internal_properties`). Previously only the four red-team keys (`attack_success`, `attack_technique`, `attack_complexity`, `attack_success_threshold`) were forwarded; structured outputs such as rubric `dimension_scores` were silently dropped. Payloads larger than 7500 characters are replaced with a valid JSON marker (`{"truncated": true, "original_size_bytes": <n>}`) so consumers can always `json.loads` the value. Non-dict `properties` payloads are now safely ignored instead of raising in the red-team forwarder.
-
-### Breaking Changes
-
-- Updated `EVALUATOR_NAME_METRICS_MAPPINGS` so `document_retrieval` and `rouge_score` report single primary metrics (`document_retrieval`, `rouge`), with previous sub-metrics now represented in each evaluator's `*_properties` payload.
 
 ### Bugs Fixed
 
@@ -27,7 +38,6 @@
 - Fixed `_get_metric_result` prefix matching where shorter metric names (e.g., `xpia`) could match before longer, more-specific ones (e.g., `xpia_manipulated_content`). Now sorts by length descending for correct longest-prefix matching.
 - Fixed non-dict `_properties` values from evaluators causing downstream issues. Values that are not dicts are now logged and dropped gracefully.
 - Fixed filename length error in `_inline_image` by catching OSError/ValueError during local path resolution and fall back to returning a text chunk instead of throwing.
-- Fixed `format_llm_response` raising `UnboundLocalError` when `inputs` was not provided by ensuring `sample_input` is always initialized.
 
 ### Other Changes
 

From ac6f4d0cef7cde85f8cfc8fcea74df9e06c8e338 Mon Sep 17 00:00:00 2001
From: Waqas Javed <7674577+w-javed@users.noreply.github.com>
Date: Wed, 20 May 2026 15:33:19 -0700
Subject: [PATCH 3/3] Bump azure-ai-evaluation version to 1.16.9

Co-authored-by: Copilot <223556219+Copilot@users.noreply.github.com>
---
 .../azure-ai-evaluation/azure/ai/evaluation/_version.py         | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/sdk/evaluation/azure-ai-evaluation/azure/ai/evaluation/_version.py b/sdk/evaluation/azure-ai-evaluation/azure/ai/evaluation/_version.py
index f8c5d12c048c..666636331a57 100644
--- a/sdk/evaluation/azure-ai-evaluation/azure/ai/evaluation/_version.py
+++ b/sdk/evaluation/azure-ai-evaluation/azure/ai/evaluation/_version.py
@@ -3,4 +3,4 @@
 # ---------------------------------------------------------
 # represents upcoming version
 
-VERSION = "1.16.8"
+VERSION = "1.16.9"