From cfe642ca9a816829f214415355e5e85839f61d0b Mon Sep 17 00:00:00 2001 From: danielxiangzl Date: Wed, 1 Apr 2026 19:18:49 -0700 Subject: [PATCH 1/2] [forge] Add mainnet-like P90 latency test as land blocking default Replaces the land_blocking forge suite with a latency-focused test that uses a mainnet-representative validator distribution (~70% EU, ~20% NA, ~10% Asia) instead of the previous even 25%/25%/25%/25% four-region split. The even split over-weights Asia (25% vs ~2% on mainnet) and under-weights EU, making P90 thresholds misleading. The new distribution causes EU proposers to dominate rounds as they do on mainnet, exercising the actual latency bottlenecks (distant proposers racing with EU batch arrival). Co-Authored-By: Claude Sonnet 4.6 --- .github/workflows/docker-build-test.yaml | 2 +- .../forge-continuous-land-blocking-test.yaml | 2 +- .../forge-cli/src/suites/land_blocking.rs | 9 +- .../src/suites/realistic_environment.rs | 44 ++++++- .../src/multi_region_network_test.rs | 116 +++++++++++++----- 5 files changed, 136 insertions(+), 37 deletions(-) diff --git a/.github/workflows/docker-build-test.yaml b/.github/workflows/docker-build-test.yaml index fe5454e26fd..61e44b1b3bd 100644 --- a/.github/workflows/docker-build-test.yaml +++ b/.github/workflows/docker-build-test.yaml @@ -268,7 +268,7 @@ jobs: secrets: inherit with: GIT_SHA: ${{ needs.determine-docker-build-metadata.outputs.gitSha }} - FORGE_TEST_SUITE: realistic_env_max_load + FORGE_TEST_SUITE: land_blocking IMAGE_TAG: ${{ needs.determine-docker-build-metadata.outputs.gitSha }} FORGE_RUNNER_DURATION_SECS: 480 COMMENT_HEADER: forge-e2e diff --git a/.github/workflows/forge-continuous-land-blocking-test.yaml b/.github/workflows/forge-continuous-land-blocking-test.yaml index e91057e0dd0..b5414a4daba 100644 --- a/.github/workflows/forge-continuous-land-blocking-test.yaml +++ b/.github/workflows/forge-continuous-land-blocking-test.yaml @@ -86,7 +86,7 @@ jobs: secrets: inherit with: GIT_SHA: ${{ needs.determine-docker-build-metadata.outputs.gitSha }} - FORGE_TEST_SUITE: realistic_env_max_load + FORGE_TEST_SUITE: land_blocking IMAGE_TAG: ${{ needs.determine-docker-build-metadata.outputs.gitSha }} FORGE_RUNNER_DURATION_SECS: 480 FORGE_CLUSTER_NAME: ${{ inputs.FORGE_CLUSTER_NAME }} diff --git a/testsuite/forge-cli/src/suites/land_blocking.rs b/testsuite/forge-cli/src/suites/land_blocking.rs index 0f9ac0fac1b..b005367bda3 100644 --- a/testsuite/forge-cli/src/suites/land_blocking.rs +++ b/testsuite/forge-cli/src/suites/land_blocking.rs @@ -2,7 +2,10 @@ // Licensed pursuant to the Innovation-Enabling Source Code License, available at https://github.com/aptos-labs/aptos-core/blob/main/LICENSE use super::ungrouped::mixed_compatible_emit_job; -use crate::{suites::realistic_environment::realistic_env_max_load_test, TestCommand}; +use crate::{ + suites::realistic_environment::realistic_env_p90_latency_test, + TestCommand, +}; use aptos_forge::{success_criteria::SuccessCriteria, ForgeConfig}; use aptos_testcases::{ compatibility_test::SimpleValidatorUpgrade, framework_upgrade::FrameworkUpgrade, @@ -16,8 +19,8 @@ pub(crate) fn get_land_blocking_test( test_cmd: &TestCommand, ) -> Option { let test = match test_name { - "land_blocking" | "realistic_env_max_load" => { - realistic_env_max_load_test(duration, test_cmd, 7, 0, 3) + "land_blocking" | "realistic_env_max_load" | "realistic_env_p90_latency" => { + realistic_env_p90_latency_test() }, "compat" => compat(), "framework_upgrade" => framework_upgrade(), diff --git a/testsuite/forge-cli/src/suites/realistic_environment.rs b/testsuite/forge-cli/src/suites/realistic_environment.rs index beab2f2073d..e25e6d92857 100644 --- a/testsuite/forge-cli/src/suites/realistic_environment.rs +++ b/testsuite/forge-cli/src/suites/realistic_environment.rs @@ -24,7 +24,7 @@ use aptos_sdk::types::on_chain_config::{ }; use aptos_testcases::{ load_vs_perf_benchmark::{LoadVsPerfBenchmark, TransactionWorkload, Workloads}, - multi_region_network_test::MultiRegionNetworkEmulationTest, + multi_region_network_test::{MultiRegionNetworkEmulationConfig, MultiRegionNetworkEmulationTest}, performance_test::PerformanceBenchmark, two_traffics_test::TwoTrafficsTest, CompositeNetworkTest, @@ -53,6 +53,7 @@ pub(crate) fn get_realistic_env_test( "realistic_env_graceful_overload" => realistic_env_graceful_overload(duration), "realistic_network_tuned_for_throughput" => realistic_network_tuned_for_throughput_test(), "realistic_env_max_load_encrypted" => realistic_env_max_load_encrypted_test(duration), + "realistic_env_p90_latency" => realistic_env_p90_latency_test(), _ => return None, // The test name does not match a realistic-env test }; Some(test) @@ -714,6 +715,47 @@ pub(crate) fn realistic_network_tuned_for_throughput_test() -> ForgeConfig { forge_config } +/// A latency-focused test that runs at a moderate TPS with a mainnet-like validator distribution: +/// ~70% EU (split across two EU regions), ~20% US East, and ~10% Asia. The geographic bias +/// matches real mainnet topology so that the P90 latency thresholds are meaningful; with an even +/// four-region split the test would under-weight EU and over-weight Asia relative to mainnet. +pub(crate) fn realistic_env_p90_latency_test() -> ForgeConfig { + let num_validators = 20; + + ForgeConfig::default() + .with_initial_validator_count(NonZeroUsize::new(num_validators).unwrap()) + .add_network_test(CompositeNetworkTest::new( + MultiRegionNetworkEmulationTest::new_with_config( + MultiRegionNetworkEmulationConfig::four_regions_mainnet_like(num_validators), + ), + PerformanceBenchmark, + )) + .with_emit_job( + EmitJobRequest::default() + .mode(EmitJobMode::ConstTps { tps: 3500 }) + .latency_polling_interval(Duration::from_millis(100)), + ) + .with_genesis_helm_config_fn(Arc::new(|helm_values| { + // No epoch change so latency measurements are stable. + helm_values["chain"]["epoch_duration_secs"] = (24 * 3600).into(); + helm_values["chain"]["on_chain_consensus_config"] = + serde_yaml::to_value(OnChainConsensusConfig::default_for_genesis()) + .expect("must serialize"); + helm_values["chain"]["on_chain_execution_config"] = + serde_yaml::to_value(OnChainExecutionConfig::default_for_genesis()) + .expect("must serialize"); + })) + .with_success_criteria( + SuccessCriteria::new(3000) + .add_no_restarts() + .add_wait_for_catchup_s(60) + .add_latency_threshold(1.5, LatencyType::P50) + .add_latency_threshold(2.5, LatencyType::P90) + .add_latency_threshold(4.0, LatencyType::P99) + .add_chain_progress(RELIABLE_REAL_ENV_PROGRESS_THRESHOLD.clone()), + ) +} + pub fn wrap_with_realistic_env( num_validators: usize, test: T, diff --git a/testsuite/testcases/src/multi_region_network_test.rs b/testsuite/testcases/src/multi_region_network_test.rs index 10b87a9b6c9..ca745f89fcc 100644 --- a/testsuite/testcases/src/multi_region_network_test.rs +++ b/testsuite/testcases/src/multi_region_network_test.rs @@ -63,12 +63,31 @@ pub(crate) fn chunk_peers(mut peers: Vec>, num_chunks: usize) -> Vec chunks } +/// Splits peers into chunks with the given exact counts. The last chunk absorbs any remaining +/// peers not accounted for by the counts. +fn chunk_peers_with_counts(mut peers: Vec>, counts: &[usize]) -> Vec> { + let mut chunks = vec![]; + for (i, &count) in counts.iter().enumerate() { + let take = if i == counts.len() - 1 { + peers.len() + } else { + count.min(peers.len()) + }; + let remaining = peers.split_off(take); + chunks.push(peers.iter().flatten().cloned().collect()); + peers = remaining; + } + chunks +} + /// Creates a table of peers grouped by region. The peers are divided into N groups, where N is the -/// number of regions provided in the link stats table. Any remaining peers are added to the first -/// group. +/// number of regions provided in the link stats table. +/// If `region_counts` is provided, each group gets exactly that many peers (last group absorbs any +/// remainder). Otherwise peers are distributed evenly across regions. fn create_link_stats_table_with_peer_groups( peers: Vec>, link_stats_table: &LinkStatsTable, + region_counts: Option<&[usize]>, ) -> LinkStatsTableWithPeerGroups { // Verify that we have enough grouped peers to simulate the link stats table assert!(peers.len() >= link_stats_table.len()); @@ -85,7 +104,17 @@ fn create_link_stats_table_with_peer_groups( ); // Create the link stats table with peer groups - let peer_chunks = chunk_peers(peers, number_of_regions); + let peer_chunks = match region_counts { + Some(counts) => { + assert_eq!( + counts.len(), + number_of_regions, + "region_counts length must match number of regions" + ); + chunk_peers_with_counts(peers, counts) + }, + None => chunk_peers(peers, number_of_regions), + }; peer_chunks .into_iter() .zip(link_stats_table.iter()) @@ -217,6 +246,8 @@ pub struct MultiRegionNetworkEmulationConfig { pub link_stats_table: LinkStatsTable, pub inter_region_config: InterRegionNetEmConfig, pub intra_region_config: Option, + /// Optional per-region peer counts. If None, peers are distributed evenly across regions. + pub region_counts: Option>, } impl Default for MultiRegionNetworkEmulationConfig { @@ -225,6 +256,7 @@ impl Default for MultiRegionNetworkEmulationConfig { link_stats_table: get_link_stats_table(FOUR_REGION_LINK_STATS), inter_region_config: InterRegionNetEmConfig::default(), intra_region_config: Some(IntraRegionNetEmConfig::default()), + region_counts: None, } } } @@ -250,6 +282,35 @@ impl MultiRegionNetworkEmulationConfig { ..Default::default() } } + + /// A four-region config that reflects the mainnet validator distribution: + /// ~70% EU, ~25% North America, ~5% Asia. The regions in the CSV are sorted + /// lexicographically, so the weights correspond to: + /// "1-gcp--eu-west2" (Netherlands / Ireland / UK) — 30% + /// "2-gcp--eu-west6" (Germany / France / CH) — 40% + /// "3-gcp--us-east4" (US East / Canada) — 20% + /// "4-gcp--as-southeast1" (Tokyo / Singapore) — 10% + /// Asia is intentionally over-represented relative to mainnet (2%) so that + /// inter-continental tail latency is exercised even with a small validator set. + pub fn four_regions_mainnet_like(num_validators: usize) -> Self { + // Weights in the same lexicographic order as the BTreeMap keys in the CSV. + let weights = [30usize, 40, 20, 10]; + let total_weight: usize = weights.iter().sum(); + let mut counts: Vec = weights + .iter() + .map(|&w| num_validators * w / total_weight) + .collect(); + // Distribute any integer-division remainder to front regions. + let allocated: usize = counts.iter().sum(); + for i in 0..(num_validators - allocated) { + counts[i % weights.len()] += 1; + } + Self { + link_stats_table: get_link_stats_table(FOUR_REGION_LINK_STATS), + region_counts: Some(counts), + ..Default::default() + } + } } /// A test to emulate network conditions for a multi-region setup. @@ -327,6 +388,7 @@ pub fn create_multi_region_swarm_network_chaos( let peer_groups = create_link_stats_table_with_peer_groups( all_peers, &network_emulation_config.link_stats_table, + network_emulation_config.region_counts.as_deref(), ); // Create the inter and intra network emulation configs @@ -385,42 +447,34 @@ mod tests { fn test_create_multi_region_swarm_network_chaos() { aptos_logger::Logger::new().init(); - // Create a config with 8 peers and multiple regions + // Default config: four regions, with intra-region netem. + // 4 intra-region + C(4,2)*2 inter-region = 4 + 12 = 16 group netems. + + // Create a config with 8 peers across 4 regions (2 per region) let all_peers: Vec<_> = (0..8).map(|_| vec![PeerId::random()]).collect(); let netem = create_multi_region_swarm_network_chaos(all_peers, None); + assert_eq!(netem.group_netems.len(), 16); - // Verify the number of group netems - assert_eq!(netem.group_netems.len(), 10); - - // Create a config with 10 peers and multiple regions - let all_peers: Vec<_> = (0..10).map(|_| vec![PeerId::random()]).collect(); + // Create a config with 12 peers across 4 regions (3 per region) + let all_peers: Vec<_> = (0..12).map(|_| vec![PeerId::random()]).collect(); let netem = create_multi_region_swarm_network_chaos(all_peers.clone(), None); + assert_eq!(netem.group_netems.len(), 16); - // Verify the resulting group netems - assert_eq!(netem.group_netems.len(), 10); - assert_eq!(netem.group_netems[0].source_nodes.len(), 4); - assert_eq!(netem.group_netems[0].target_nodes.len(), 4); + // Intra-region netems come first (sorted by BTreeMap key order). + // First region lexicographically is "1-gcp--eu-west2". + assert_eq!(netem.group_netems[0].source_nodes.len(), 3); + assert_eq!(netem.group_netems[0].target_nodes.len(), 3); assert_eq!(netem.group_netems[0], GroupNetEm { - name: "aws--ap-northeast-1-self-netem".to_owned(), + name: "1-gcp--eu-west2-self-netem".to_owned(), rate_in_mbps: 10000, - source_nodes: vec![ - all_peers[0][0], - all_peers[1][0], - all_peers[8][0], - all_peers[9][0], - ], - target_nodes: vec![ - all_peers[0][0], - all_peers[1][0], - all_peers[8][0], - all_peers[9][0], - ], - delay_latency_ms: 50, - delay_jitter_ms: 5, - delay_correlation_percentage: 50, + source_nodes: vec![all_peers[0][0], all_peers[1][0], all_peers[2][0]], + target_nodes: vec![all_peers[0][0], all_peers[1][0], all_peers[2][0]], + delay_latency_ms: 20, + delay_jitter_ms: 0, + delay_correlation_percentage: 20, loss_percentage: 1, - loss_correlation_percentage: 50 - }) + loss_correlation_percentage: 20, + }); } #[test] From 825c202056fc22720d13fc2cfd68722f16de2a92 Mon Sep 17 00:00:00 2001 From: danielxiangzl Date: Thu, 2 Apr 2026 19:55:46 -0700 Subject: [PATCH 2/2] [ci] Increase forge land_blocking duration to 600s for P90 latency test Co-Authored-By: Claude Opus 4.6 (1M context) --- .github/workflows/docker-build-test.yaml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/.github/workflows/docker-build-test.yaml b/.github/workflows/docker-build-test.yaml index 61e44b1b3bd..128529f383d 100644 --- a/.github/workflows/docker-build-test.yaml +++ b/.github/workflows/docker-build-test.yaml @@ -270,7 +270,7 @@ jobs: GIT_SHA: ${{ needs.determine-docker-build-metadata.outputs.gitSha }} FORGE_TEST_SUITE: land_blocking IMAGE_TAG: ${{ needs.determine-docker-build-metadata.outputs.gitSha }} - FORGE_RUNNER_DURATION_SECS: 480 + FORGE_RUNNER_DURATION_SECS: 600 COMMENT_HEADER: forge-e2e # Use the cache ID as the Forge namespace so we can limit Forge test concurrency on k8s, since Forge # test lifecycle is separate from that of GHA. This protects us from the case where many Forge tests are triggered