googleapis
diff --git a/‎tests/unit/vertexai/genai/replays/test_generate_loss_clusters.py‎
Lines changed: 76 additions & 0 deletions b/‎tests/unit/vertexai/genai/replays/test_generate_loss_clusters.py‎
Lines changed: 76 additions & 0 deletions
diff --git a/‎tests/unit/vertexai/genai/test_evals.py‎
Lines changed: 95 additions & 0 deletions b/‎tests/unit/vertexai/genai/test_evals.py‎
Lines changed: 95 additions & 0 deletions
diff --git a/‎vertexai/_genai/_evals_utils.py‎
Lines changed: 109 additions & 1 deletion b/‎vertexai/_genai/_evals_utils.py‎
Lines changed: 109 additions & 1 deletion
@@ -0,0 +1,76 @@
+# Copyright 2026 Google LLC
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+#
+# pylint: disable=protected-access,bad-continuation,missing-function-docstring
+
+from tests.unit.vertexai.genai.replays import pytest_helper
+from vertexai import types
+import pytest
+
+
+def test_gen_loss_clusters(client):
+  """Tests that generate_loss_clusters() returns GenerateLossClustersResponse."""
+  eval_result = types.EvaluationResult()
+  response = client.evals.generate_loss_clusters(
+      eval_result=eval_result,
+      config=types.LossAnalysisConfig(
+          metric="multi_turn_task_success_v1",
+          candidate="travel-agent",
+      ),
+  )
+  assert isinstance(response, types.GenerateLossClustersResponse)
+  assert len(response.results) == 1
+  result = response.results[0]
+  assert result.config.metric == "multi_turn_task_success_v1"
+  assert result.config.candidate == "travel-agent"
+  assert len(result.clusters) == 2
+  assert result.clusters[0].cluster_id == "cluster-1"
+  assert result.clusters[0].taxonomy_entry.l1_category == "Tool Calling"
+  assert (
+      result.clusters[0].taxonomy_entry.l2_category == "Missing Tool Invocation"
+  )
+  assert result.clusters[0].item_count == 3
+  assert result.clusters[1].cluster_id == "cluster-2"
+  assert result.clusters[1].taxonomy_entry.l1_category == "Hallucination"
+  assert result.clusters[1].item_count == 2
+
+
+pytest_plugins = ("pytest_asyncio",)
+
+
+@pytest.mark.asyncio
+async def test_gen_loss_clusters_async(client):
+  """Tests that generate_loss_clusters() async returns GenerateLossClustersResponse."""
+  eval_result = types.EvaluationResult()
+  response = await client.aio.evals.generate_loss_clusters(
+      eval_result=eval_result,
+      config=types.LossAnalysisConfig(
+          metric="multi_turn_task_success_v1",
+          candidate="travel-agent",
+      ),
+  )
+  assert isinstance(response, types.GenerateLossClustersResponse)
+  assert len(response.results) == 1
+  result = response.results[0]
+  assert result.config.metric == "multi_turn_task_success_v1"
+  assert len(result.clusters) == 2
+  assert result.clusters[0].cluster_id == "cluster-1"
+  assert result.clusters[1].cluster_id == "cluster-2"
+
+
+pytestmark = pytest_helper.setup(
+    file=__file__,
+    globals_for_file=globals(),
+    test_method="evals.generate_loss_clusters",
+)
@@ -29,6 +29,7 @@
 from google.cloud.aiplatform import initializer as aiplatform_initializer
 from vertexai import _genai
 from vertexai._genai import _evals_data_converters
+from vertexai._genai import _evals_utils
 from vertexai._genai import _evals_metric_handlers
 from vertexai._genai import _evals_visualization
 from vertexai._genai import _evals_metric_loaders
@@ -265,6 +266,100 @@ def test_t_inline_results(self):
         assert payload[0]["candidate_results"][0]["score"] == 0.0
 
 
+class TestLossAnalysis:
+    """Unit tests for loss analysis types and visualization."""
+
+    def test_response_structure(self):
+        response = common_types.GenerateLossClustersResponse(
+            analysis_time="2026-04-01T10:00:00Z",
+            results=[
+                common_types.LossAnalysisResult(
+                    config=common_types.LossAnalysisConfig(
+                        metric="multi_turn_task_success_v1",
+                        candidate="travel-agent",
+                    ),
+                    analysis_time="2026-04-01T10:00:00Z",
+                    clusters=[
+                        common_types.LossCluster(
+                            cluster_id="cluster-1",
+                            taxonomy_entry=common_types.LossTaxonomyEntry(
+                                l1_category="Tool Calling",
+                                l2_category="Missing Tool Invocation",
+                                description="The agent failed to invoke a required tool.",
+                            ),
+                            item_count=3,
+                        ),
+                        common_types.LossCluster(
+                            cluster_id="cluster-2",
+                            taxonomy_entry=common_types.LossTaxonomyEntry(
+                                l1_category="Hallucination",
+                                l2_category="Hallucination of Action",
+                                description="Verbally confirmed action without tool.",
+                            ),
+                            item_count=2,
+                        ),
+                    ],
+                )
+            ],
+        )
+        assert len(response.results) == 1
+        assert response.analysis_time == "2026-04-01T10:00:00Z"
+        result = response.results[0]
+        assert result.config.metric == "multi_turn_task_success_v1"
+        assert len(result.clusters) == 2
+        assert result.clusters[0].cluster_id == "cluster-1"
+        assert result.clusters[0].item_count == 3
+        assert result.clusters[1].cluster_id == "cluster-2"
+
+    def test_response_show_with_results(self, capsys):
+        response = common_types.GenerateLossClustersResponse(
+            results=[
+                common_types.LossAnalysisResult(
+                    config=common_types.LossAnalysisConfig(
+                        metric="test_metric",
+                        candidate="test-candidate",
+                    ),
+                    clusters=[
+                        common_types.LossCluster(
+                            cluster_id="c1",
+                            taxonomy_entry=common_types.LossTaxonomyEntry(
+                                l1_category="Cat1",
+                                l2_category="SubCat1",
+                            ),
+                            item_count=5,
+                        ),
+                    ],
+                )
+            ],
+        )
+        response.show()
+        captured = capsys.readouterr()
+        assert "test_metric" in captured.out
+        assert "c1" in captured.out
+
+    def test_loss_analysis_result_show(self, capsys):
+        result = common_types.LossAnalysisResult(
+            config=common_types.LossAnalysisConfig(
+                metric="test_metric",
+                candidate="test-candidate",
+            ),
+            clusters=[
+                common_types.LossCluster(
+                    cluster_id="c1",
+                    taxonomy_entry=common_types.LossTaxonomyEntry(
+                        l1_category="DirectCat",
+                        l2_category="DirectSubCat",
+                    ),
+                    item_count=7,
+                ),
+            ],
+        )
+        result.show()
+        captured = capsys.readouterr()
+        assert "test_metric" in captured.out
+        assert "c1" in captured.out
+
+
 class TestEvals:
     """Unit tests for the GenAI client."""
 
 
@@ -15,9 +15,11 @@
 """Utility functions for evals."""
 
 import abc
+import asyncio
+import json
 import logging
 import os
-import json
+import time
 from typing import Any, Optional, Union
 
 from google.genai._api_client import BaseApiClient
@@ -366,6 +368,112 @@ def _postprocess_user_scenarios_response(
     )
 
 
+def _display_loss_analysis_result(
+    result: types.LossAnalysisResult,
+) -> None:
+    """Displays a LossAnalysisResult as a formatted pandas DataFrame."""
+    metric = result.config.metric if result.config else None
+    candidate = result.config.candidate if result.config else None
+    rows = []
+    for cluster in result.clusters or []:
+        entry = cluster.taxonomy_entry
+        row = {
+            "metric": metric,
+            "candidate": candidate,
+            "cluster_id": cluster.cluster_id,
+            "l1_category": entry.l1_category if entry else None,
+            "l2_category": entry.l2_category if entry else None,
+            "description": entry.description if entry else None,
+            "item_count": cluster.item_count,
+        }
+        rows.append(row)
+
+    if not rows:
+        print("No loss clusters found.")  # pylint: disable=print-function
+        return
+
+    df = pd.DataFrame(rows)
+    try:
+        from IPython.display import display  # pylint: disable=g-import-not-at-top
+
+        display(df)
+    except ImportError:
+        print(df.to_string())  # pylint: disable=print-function
+
+
+
+
+
+def _poll_operation(
+    api_client: BaseApiClient,
+    operation: types.GenerateLossClustersOperation,
+    poll_interval_seconds: float = 5.0,
+) -> types.GenerateLossClustersOperation:
+    """Polls a long-running operation until completion.
+
+    Args:
+        api_client: The API client to use for polling.
+        operation: The initial operation returned from the API call.
+        poll_interval_seconds: Time between polls.
+
+    Returns:
+        The completed operation.
+    """
+    if operation.done:
+        return operation
+    start_time = time.time()
+    while True:
+        response = api_client.request("get", operation.name, {}, None)
+        response_dict = {} if not response.body else json.loads(response.body)
+        polled = types.GenerateLossClustersOperation._from_response(
+            response=response_dict, kwargs={}
+        )
+        if polled.done:
+            return polled
+        elapsed = int(time.time() - start_time)
+        logger.info(
+            "Loss analysis operation still running... Elapsed time: %d seconds",
+            elapsed,
+        )
+        time.sleep(poll_interval_seconds)
+
+
+async def _poll_operation_async(
+    api_client: BaseApiClient,
+    operation: types.GenerateLossClustersOperation,
+    poll_interval_seconds: float = 5.0,
+) -> types.GenerateLossClustersOperation:
+    """Polls a long-running operation until completion (async).
+
+    Args:
+        api_client: The API client to use for polling.
+        operation: The initial operation returned from the API call.
+        poll_interval_seconds: Time between polls.
+
+    Returns:
+        The completed operation.
+    """
+    if operation.done:
+        return operation
+    start_time = time.time()
+    while True:
+        response = await api_client.async_request(
+            "get", operation.name, {}, None
+        )
+        response_dict = {} if not response.body else json.loads(response.body)
+        polled = types.GenerateLossClustersOperation._from_response(
+            response=response_dict, kwargs={}
+        )
+        if polled.done:
+            return polled
+        elapsed = int(time.time() - start_time)
+        logger.info(
+            "Loss analysis operation still running... Elapsed time: %d seconds",
+            elapsed,
+        )
+        await asyncio.sleep(poll_interval_seconds)
+
+
 def _validate_dataset_agent_data(
     dataset: types.EvaluationDataset,
     inference_configs: Optional[dict[str, Any]] = None,