Skip to content

Commit 657f379

Browse files
vertex-sdk-botcopybara-github
authored andcommitted
feat: GenAI Client(evals) - Add red_teaming_config support for create_evaluation_run
PiperOrigin-RevId: 914075746
1 parent 73a515a commit 657f379

6 files changed

Lines changed: 352 additions & 0 deletions

File tree

tests/unit/vertexai/genai/replays/test_create_evaluation_run.py

Lines changed: 30 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -614,6 +614,7 @@ def test_create_eval_run_with_metric_resource_name(mock_uuid4, client):
614614
# assert eval_item.evaluation_request.candidate_responses == []
615615
# assert evaluation_run.error is None
616616

617+
617618
# def test_create_eval_run_data_source_evaluation_dataset_with_agent_info_and_prompt_template_data(
618619
# client,
619620
# ):
@@ -708,6 +709,35 @@ def test_create_eval_run_with_metric_resource_name(mock_uuid4, client):
708709
# == INPUT_DF_WITH_CONTEXT_AND_HISTORY.iloc[i]["response"]
709710
# )
710711
# assert evaluation_run.error is None
712+
def test_create_eval_run_with_red_teaming_config(client):
713+
"""Tests that create_evaluation_run() with red_teaming_config sends analysisConfigs."""
714+
evaluation_run = client.evals.create_evaluation_run(
715+
name="test_red_teaming",
716+
display_name="test_red_teaming",
717+
dataset=types.EvaluationRunDataSource(evaluation_set=EVAL_SET_NAME),
718+
dest=GCS_DEST,
719+
metrics=[],
720+
red_teaming_config=types.RedTeamingAnalysisConfig(
721+
attack_categories=["FINANCIAL_OR_CREDENTIAL_PHISHING"],
722+
vulnerable_tools=[
723+
types.VulnerableTool(
724+
tool_name="search_flights",
725+
json_paths=["$.flights[0].description"],
726+
),
727+
],
728+
),
729+
)
730+
assert isinstance(evaluation_run, types.EvaluationRun)
731+
assert evaluation_run.display_name == "test_red_teaming"
732+
assert evaluation_run.state == types.EvaluationRunState.PENDING
733+
assert evaluation_run.analysis_configs is not None
734+
assert len(evaluation_run.analysis_configs) == 1
735+
rt_config = evaluation_run.analysis_configs[0].red_teaming_analysis_config
736+
assert rt_config.attack_categories == ["FINANCIAL_OR_CREDENTIAL_PHISHING"]
737+
assert rt_config.vulnerable_tools[0].tool_name == "search_flights"
738+
assert evaluation_run.error is None
739+
740+
711741
pytest_plugins = ("pytest_asyncio",)
712742

713743

tests/unit/vertexai/genai/test_evals.py

Lines changed: 83 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1835,6 +1835,89 @@ def test_loss_analysis_metrics_accepts_metric_object(self):
18351835
assert result[0].candidate == "agent-1"
18361836

18371837

1838+
class TestRedTeamingTypes:
1839+
"""Unit tests for red teaming type definitions."""
1840+
1841+
def test_red_teaming_analysis_config_construction(self):
1842+
config = common_types.RedTeamingAnalysisConfig(
1843+
attack_categories=["FINANCIAL_OR_CREDENTIAL_PHISHING"],
1844+
vulnerable_tools=[
1845+
common_types.VulnerableTool(
1846+
tool_name="search_flights",
1847+
json_paths=["$.flights[0].description"],
1848+
),
1849+
],
1850+
)
1851+
assert len(config.attack_categories) == 1
1852+
assert config.vulnerable_tools[0].tool_name == "search_flights"
1853+
1854+
def test_red_teaming_analysis_config_optional_fields(self):
1855+
config = common_types.RedTeamingAnalysisConfig()
1856+
assert config.attack_categories is None
1857+
assert config.vulnerable_tools is None
1858+
1859+
def test_evaluation_run_results_has_red_teaming_results(self):
1860+
results = common_types.EvaluationRunResults(
1861+
red_teaming_analysis_results=[
1862+
common_types.RedTeamingAnalysisResult(
1863+
category_results=[
1864+
common_types.AttackCategoryResult(
1865+
attack_category="FINANCIAL_OR_CREDENTIAL_PHISHING",
1866+
attack_success_rate=0.9,
1867+
),
1868+
],
1869+
)
1870+
],
1871+
)
1872+
assert len(results.red_teaming_analysis_results) == 1
1873+
assert (
1874+
results.red_teaming_analysis_results[0]
1875+
.category_results[0]
1876+
.attack_success_rate
1877+
== 0.9
1878+
)
1879+
1880+
def test_create_params_accepts_analysis_configs(self):
1881+
params = common_types._CreateEvaluationRunParameters(
1882+
name="test-run",
1883+
analysis_configs=[
1884+
common_types.AnalysisConfig(
1885+
red_teaming_analysis_config=common_types.RedTeamingAnalysisConfig(
1886+
attack_categories=["FINANCIAL_OR_CREDENTIAL_PHISHING"],
1887+
),
1888+
),
1889+
],
1890+
)
1891+
assert len(params.analysis_configs) == 1
1892+
1893+
1894+
class TestResolveRedTeamingConfig:
1895+
"""Unit tests for _resolve_red_teaming_config."""
1896+
1897+
def test_none_when_no_config(self):
1898+
result = _evals_utils._resolve_red_teaming_config()
1899+
assert result is None
1900+
1901+
def test_wraps_config_in_analysis_configs(self):
1902+
config = common_types.RedTeamingAnalysisConfig(
1903+
attack_categories=["FINANCIAL_OR_CREDENTIAL_PHISHING"],
1904+
)
1905+
result = _evals_utils._resolve_red_teaming_config(config)
1906+
assert len(result) == 1
1907+
assert isinstance(result[0], common_types.AnalysisConfig)
1908+
assert (
1909+
result[0].red_teaming_analysis_config.attack_categories[0]
1910+
== "FINANCIAL_OR_CREDENTIAL_PHISHING"
1911+
)
1912+
1913+
def test_accepts_dict_input(self):
1914+
result = _evals_utils._resolve_red_teaming_config(
1915+
{"attack_categories": ["INJECTED_HOSTILITY_AND_HARASSMENT"]}
1916+
)
1917+
assert len(result) == 1
1918+
assert isinstance(result[0], common_types.AnalysisConfig)
1919+
1920+
18381921
class TestResolveMetricName:
18391922
"""Unit tests for _resolve_metric_name."""
18401923

vertexai/_genai/_evals_utils.py

Lines changed: 14 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -541,6 +541,20 @@ def _resolve_eval_run_loss_configs(
541541
return configs
542542

543543

544+
def _resolve_red_teaming_config(
545+
red_teaming_config: Optional[types.RedTeamingAnalysisConfigOrDict] = None,
546+
) -> Optional[list[types.AnalysisConfig]]:
547+
"""Wraps a RedTeamingAnalysisConfig into analysis_configs for the API."""
548+
if not red_teaming_config:
549+
return None
550+
config = (
551+
types.RedTeamingAnalysisConfig.model_validate(red_teaming_config)
552+
if isinstance(red_teaming_config, dict)
553+
else red_teaming_config
554+
)
555+
return [types.AnalysisConfig(red_teaming_analysis_config=config)]
556+
557+
544558
def _resolve_loss_analysis_config(
545559
eval_result: types.EvaluationResult,
546560
config: Optional[types.LossAnalysisConfig] = None,

vertexai/_genai/evals.py

Lines changed: 33 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -130,6 +130,13 @@ def _CreateEvaluationRunParameters_to_vertex(
130130
if getv(from_object, ["config"]) is not None:
131131
setv(to_object, ["config"], getv(from_object, ["config"]))
132132

133+
if getv(from_object, ["analysis_configs"]) is not None:
134+
setv(
135+
to_object,
136+
["analysisConfigs"],
137+
[item for item in getv(from_object, ["analysis_configs"])],
138+
)
139+
133140
return to_object
134141

135142

@@ -603,6 +610,13 @@ def _EvaluationRun_from_vertex(
603610
if getv(from_object, ["labels"]) is not None:
604611
setv(to_object, ["labels"], getv(from_object, ["labels"]))
605612

613+
if getv(from_object, ["analysisConfigs"]) is not None:
614+
setv(
615+
to_object,
616+
["analysis_configs"],
617+
[item for item in getv(from_object, ["analysisConfigs"])],
618+
)
619+
606620
return to_object
607621

608622

@@ -1159,6 +1173,7 @@ def _create_evaluation_run(
11591173
dict[str, types.EvaluationRunInferenceConfigOrDict]
11601174
] = None,
11611175
config: Optional[types.CreateEvaluationRunConfigOrDict] = None,
1176+
analysis_configs: Optional[list[types.AnalysisConfigOrDict]] = None,
11621177
) -> types.EvaluationRun:
11631178
"""
11641179
Creates an EvaluationRun.
@@ -1172,6 +1187,7 @@ def _create_evaluation_run(
11721187
labels=labels,
11731188
inference_configs=inference_configs,
11741189
config=config,
1190+
analysis_configs=analysis_configs,
11751191
)
11761192

11771193
request_url_dict: Optional[dict[str, str]]
@@ -2616,6 +2632,7 @@ def create_evaluation_run(
26162632
labels: Optional[dict[str, str]] = None,
26172633
loss_analysis_metrics: Optional[list[Union[str, types.MetricOrDict]]] = None,
26182634
loss_analysis_configs: Optional[list[types.LossAnalysisConfigOrDict]] = None,
2635+
red_teaming_config: Optional[types.RedTeamingAnalysisConfigOrDict] = None,
26192636
config: Optional[types.CreateEvaluationRunConfigOrDict] = None,
26202637
) -> types.EvaluationRun:
26212638
"""Creates an EvaluationRun.
@@ -2734,6 +2751,9 @@ def create_evaluation_run(
27342751
loss_analysis_configs=loss_analysis_configs,
27352752
inference_configs=inference_configs,
27362753
)
2754+
resolved_analysis_configs = _evals_utils._resolve_red_teaming_config(
2755+
red_teaming_config
2756+
)
27372757
evaluation_config = types.EvaluationRunConfig(
27382758
output_config=output_config,
27392759
metrics=resolved_metrics,
@@ -2751,6 +2771,7 @@ def create_evaluation_run(
27512771
data_source=resolved_dataset,
27522772
evaluation_config=evaluation_config,
27532773
inference_configs=resolved_inference_configs,
2774+
analysis_configs=resolved_analysis_configs,
27542775
labels=resolved_labels,
27552776
config=config,
27562777
)
@@ -3299,6 +3320,7 @@ async def _create_evaluation_run(
32993320
dict[str, types.EvaluationRunInferenceConfigOrDict]
33003321
] = None,
33013322
config: Optional[types.CreateEvaluationRunConfigOrDict] = None,
3323+
analysis_configs: Optional[list[types.AnalysisConfigOrDict]] = None,
33023324
) -> types.EvaluationRun:
33033325
"""
33043326
Creates an EvaluationRun.
@@ -3312,6 +3334,7 @@ async def _create_evaluation_run(
33123334
labels=labels,
33133335
inference_configs=inference_configs,
33143336
config=config,
3337+
analysis_configs=analysis_configs,
33153338
)
33163339

33173340
request_url_dict: Optional[dict[str, str]]
@@ -4395,6 +4418,7 @@ async def create_evaluation_run(
43954418
inference_configs: Optional[
43964419
dict[str, types.EvaluationRunInferenceConfigOrDict]
43974420
] = None,
4421+
red_teaming_config: Optional[types.RedTeamingAnalysisConfigOrDict] = None,
43984422
labels: Optional[dict[str, str]] = None,
43994423
loss_analysis_metrics: Optional[list[Union[str, types.MetricOrDict]]] = None,
44004424
loss_analysis_configs: Optional[list[types.LossAnalysisConfigOrDict]] = None,
@@ -4426,6 +4450,11 @@ async def create_evaluation_run(
44264450
this will be automatically constructed using `agent_info` and `user_simulator_config`.
44274451
Example:
44284452
{"candidate-1": types.EvaluationRunInferenceConfig(model="gemini-2.5-flash")}
4453+
red_teaming_config: This field is experimental and may change in future
4454+
versions. Optional configuration for automated Agent Red Teaming
4455+
analysis. Specifies attack categories and vulnerable tools to
4456+
test. When provided, the server runs a red teaming pipeline
4457+
instead of standard evaluation metrics.
44294458
labels: The labels to apply to the evaluation run.
44304459
loss_analysis_metrics: This field is experimental and may change in future
44314460
versions. Optional list of metrics to run loss analysis on. The
@@ -4511,6 +4540,9 @@ async def create_evaluation_run(
45114540
loss_analysis_configs=loss_analysis_configs,
45124541
inference_configs=inference_configs,
45134542
)
4543+
resolved_analysis_configs = _evals_utils._resolve_red_teaming_config(
4544+
red_teaming_config
4545+
)
45144546
evaluation_config = types.EvaluationRunConfig(
45154547
output_config=output_config,
45164548
metrics=resolved_metrics,
@@ -4529,6 +4561,7 @@ async def create_evaluation_run(
45294561
data_source=resolved_dataset,
45304562
evaluation_config=evaluation_config,
45314563
inference_configs=resolved_inference_configs,
4564+
analysis_configs=resolved_analysis_configs,
45324565
labels=resolved_labels,
45334566
config=config,
45344567
)

vertexai/_genai/types/__init__.py

Lines changed: 30 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -181,6 +181,9 @@
181181
from .common import AggregatedMetricResult
182182
from .common import AggregatedMetricResultDict
183183
from .common import AggregatedMetricResultOrDict
184+
from .common import AnalysisConfig
185+
from .common import AnalysisConfigDict
186+
from .common import AnalysisConfigOrDict
184187
from .common import AppendAgentEngineSessionEventConfig
185188
from .common import AppendAgentEngineSessionEventConfigDict
186189
from .common import AppendAgentEngineSessionEventConfigOrDict
@@ -202,6 +205,9 @@
202205
from .common import AssessDatasetConfig
203206
from .common import AssessDatasetConfigDict
204207
from .common import AssessDatasetConfigOrDict
208+
from .common import AttackCategoryResult
209+
from .common import AttackCategoryResultDict
210+
from .common import AttackCategoryResultOrDict
205211
from .common import BatchPredictionResourceUsageAssessmentConfig
206212
from .common import BatchPredictionResourceUsageAssessmentConfigDict
207213
from .common import BatchPredictionResourceUsageAssessmentConfigOrDict
@@ -1063,6 +1069,12 @@
10631069
from .common import ReasoningEngineTrafficConfigTrafficSplitManualTarget
10641070
from .common import ReasoningEngineTrafficConfigTrafficSplitManualTargetDict
10651071
from .common import ReasoningEngineTrafficConfigTrafficSplitManualTargetOrDict
1072+
from .common import RedTeamingAnalysisConfig
1073+
from .common import RedTeamingAnalysisConfigDict
1074+
from .common import RedTeamingAnalysisConfigOrDict
1075+
from .common import RedTeamingAnalysisResult
1076+
from .common import RedTeamingAnalysisResultDict
1077+
from .common import RedTeamingAnalysisResultOrDict
10661078
from .common import ReservationAffinity
10671079
from .common import ReservationAffinityDict
10681080
from .common import ReservationAffinityOrDict
@@ -1440,6 +1452,9 @@
14401452
from .common import VertexBaseConfig
14411453
from .common import VertexBaseConfigDict
14421454
from .common import VertexBaseConfigOrDict
1455+
from .common import VulnerableTool
1456+
from .common import VulnerableToolDict
1457+
from .common import VulnerableToolOrDict
14431458
from .common import WinRateStats
14441459
from .common import WinRateStatsDict
14451460
from .common import WinRateStatsOrDict
@@ -1577,12 +1592,27 @@
15771592
"EvaluationRunInferenceConfig",
15781593
"EvaluationRunInferenceConfigDict",
15791594
"EvaluationRunInferenceConfigOrDict",
1595+
"VulnerableTool",
1596+
"VulnerableToolDict",
1597+
"VulnerableToolOrDict",
1598+
"RedTeamingAnalysisConfig",
1599+
"RedTeamingAnalysisConfigDict",
1600+
"RedTeamingAnalysisConfigOrDict",
1601+
"AnalysisConfig",
1602+
"AnalysisConfigDict",
1603+
"AnalysisConfigOrDict",
15801604
"CreateEvaluationRunConfig",
15811605
"CreateEvaluationRunConfigDict",
15821606
"CreateEvaluationRunConfigOrDict",
15831607
"SummaryMetric",
15841608
"SummaryMetricDict",
15851609
"SummaryMetricOrDict",
1610+
"AttackCategoryResult",
1611+
"AttackCategoryResultDict",
1612+
"AttackCategoryResultOrDict",
1613+
"RedTeamingAnalysisResult",
1614+
"RedTeamingAnalysisResultDict",
1615+
"RedTeamingAnalysisResultOrDict",
15861616
"LossTaxonomyEntry",
15871617
"LossTaxonomyEntryDict",
15881618
"LossTaxonomyEntryOrDict",

0 commit comments

Comments
 (0)