Skip to content
Closed
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
4 changes: 2 additions & 2 deletions .github/workflows/docker-build-test.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -268,9 +268,9 @@ jobs:
secrets: inherit
with:
GIT_SHA: ${{ needs.determine-docker-build-metadata.outputs.gitSha }}
FORGE_TEST_SUITE: realistic_env_max_load
FORGE_TEST_SUITE: land_blocking
IMAGE_TAG: ${{ needs.determine-docker-build-metadata.outputs.gitSha }}
FORGE_RUNNER_DURATION_SECS: 480
FORGE_RUNNER_DURATION_SECS: 600
COMMENT_HEADER: forge-e2e
# Use the cache ID as the Forge namespace so we can limit Forge test concurrency on k8s, since Forge
# test lifecycle is separate from that of GHA. This protects us from the case where many Forge tests are triggered
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -86,7 +86,7 @@ jobs:
secrets: inherit
with:
GIT_SHA: ${{ needs.determine-docker-build-metadata.outputs.gitSha }}
FORGE_TEST_SUITE: realistic_env_max_load
FORGE_TEST_SUITE: land_blocking
IMAGE_TAG: ${{ needs.determine-docker-build-metadata.outputs.gitSha }}
FORGE_RUNNER_DURATION_SECS: 480
FORGE_CLUSTER_NAME: ${{ inputs.FORGE_CLUSTER_NAME }}
Expand Down
9 changes: 6 additions & 3 deletions testsuite/forge-cli/src/suites/land_blocking.rs
Original file line number Diff line number Diff line change
Expand Up @@ -2,7 +2,10 @@
// Licensed pursuant to the Innovation-Enabling Source Code License, available at https://github.com/aptos-labs/aptos-core/blob/main/LICENSE

use super::ungrouped::mixed_compatible_emit_job;
use crate::{suites::realistic_environment::realistic_env_max_load_test, TestCommand};
use crate::{
suites::realistic_environment::realistic_env_p90_latency_test,
TestCommand,
};
use aptos_forge::{success_criteria::SuccessCriteria, ForgeConfig};
use aptos_testcases::{
compatibility_test::SimpleValidatorUpgrade, framework_upgrade::FrameworkUpgrade,
Expand All @@ -16,8 +19,8 @@ pub(crate) fn get_land_blocking_test(
test_cmd: &TestCommand,
) -> Option<ForgeConfig> {
let test = match test_name {
"land_blocking" | "realistic_env_max_load" => {
realistic_env_max_load_test(duration, test_cmd, 7, 0, 3)
"land_blocking" | "realistic_env_max_load" | "realistic_env_p90_latency" => {
realistic_env_p90_latency_test()
},
"compat" => compat(),
"framework_upgrade" => framework_upgrade(),
Expand Down
44 changes: 43 additions & 1 deletion testsuite/forge-cli/src/suites/realistic_environment.rs
Original file line number Diff line number Diff line change
Expand Up @@ -24,7 +24,7 @@ use aptos_sdk::types::on_chain_config::{
};
use aptos_testcases::{
load_vs_perf_benchmark::{LoadVsPerfBenchmark, TransactionWorkload, Workloads},
multi_region_network_test::MultiRegionNetworkEmulationTest,
multi_region_network_test::{MultiRegionNetworkEmulationConfig, MultiRegionNetworkEmulationTest},
performance_test::PerformanceBenchmark,
two_traffics_test::TwoTrafficsTest,
CompositeNetworkTest,
Expand Down Expand Up @@ -53,6 +53,7 @@ pub(crate) fn get_realistic_env_test(
"realistic_env_graceful_overload" => realistic_env_graceful_overload(duration),
"realistic_network_tuned_for_throughput" => realistic_network_tuned_for_throughput_test(),
"realistic_env_max_load_encrypted" => realistic_env_max_load_encrypted_test(duration),
"realistic_env_p90_latency" => realistic_env_p90_latency_test(),
_ => return None, // The test name does not match a realistic-env test
};
Some(test)
Expand Down Expand Up @@ -714,6 +715,47 @@ pub(crate) fn realistic_network_tuned_for_throughput_test() -> ForgeConfig {
forge_config
}

/// A latency-focused test that runs at a moderate TPS with a mainnet-like validator distribution:
/// ~70% EU (split across two EU regions), ~20% US East, and ~10% Asia. The geographic bias
/// matches real mainnet topology so that the P90 latency thresholds are meaningful; with an even
/// four-region split the test would under-weight EU and over-weight Asia relative to mainnet.
pub(crate) fn realistic_env_p90_latency_test() -> ForgeConfig {
let num_validators = 20;

ForgeConfig::default()
.with_initial_validator_count(NonZeroUsize::new(num_validators).unwrap())
.add_network_test(CompositeNetworkTest::new(
MultiRegionNetworkEmulationTest::new_with_config(
MultiRegionNetworkEmulationConfig::four_regions_mainnet_like(num_validators),
),
PerformanceBenchmark,
))
.with_emit_job(
EmitJobRequest::default()
.mode(EmitJobMode::ConstTps { tps: 3500 })
.latency_polling_interval(Duration::from_millis(100)),
)
.with_genesis_helm_config_fn(Arc::new(|helm_values| {
// No epoch change so latency measurements are stable.
helm_values["chain"]["epoch_duration_secs"] = (24 * 3600).into();
helm_values["chain"]["on_chain_consensus_config"] =
serde_yaml::to_value(OnChainConsensusConfig::default_for_genesis())
.expect("must serialize");
helm_values["chain"]["on_chain_execution_config"] =
serde_yaml::to_value(OnChainExecutionConfig::default_for_genesis())
.expect("must serialize");
}))
.with_success_criteria(
SuccessCriteria::new(3000)
.add_no_restarts()
.add_wait_for_catchup_s(60)
.add_latency_threshold(1.5, LatencyType::P50)
.add_latency_threshold(2.5, LatencyType::P90)
.add_latency_threshold(4.0, LatencyType::P99)
.add_chain_progress(RELIABLE_REAL_ENV_PROGRESS_THRESHOLD.clone()),
)
}

pub fn wrap_with_realistic_env<T: NetworkTest + 'static>(
num_validators: usize,
test: T,
Expand Down
116 changes: 85 additions & 31 deletions testsuite/testcases/src/multi_region_network_test.rs
Original file line number Diff line number Diff line change
Expand Up @@ -63,12 +63,31 @@ pub(crate) fn chunk_peers(mut peers: Vec<Vec<PeerId>>, num_chunks: usize) -> Vec
chunks
}

/// Splits peers into chunks with the given exact counts. The last chunk absorbs any remaining
/// peers not accounted for by the counts.
fn chunk_peers_with_counts(mut peers: Vec<Vec<PeerId>>, counts: &[usize]) -> Vec<Vec<PeerId>> {
let mut chunks = vec![];
for (i, &count) in counts.iter().enumerate() {
let take = if i == counts.len() - 1 {
peers.len()
} else {
count.min(peers.len())
};
let remaining = peers.split_off(take);
chunks.push(peers.iter().flatten().cloned().collect());
peers = remaining;
}
chunks
}

/// Creates a table of peers grouped by region. The peers are divided into N groups, where N is the
/// number of regions provided in the link stats table. Any remaining peers are added to the first
/// group.
/// number of regions provided in the link stats table.
/// If `region_counts` is provided, each group gets exactly that many peers (last group absorbs any
/// remainder). Otherwise peers are distributed evenly across regions.
fn create_link_stats_table_with_peer_groups(
peers: Vec<Vec<PeerId>>,
link_stats_table: &LinkStatsTable,
region_counts: Option<&[usize]>,
) -> LinkStatsTableWithPeerGroups {
// Verify that we have enough grouped peers to simulate the link stats table
assert!(peers.len() >= link_stats_table.len());
Expand All @@ -85,7 +104,17 @@ fn create_link_stats_table_with_peer_groups(
);

// Create the link stats table with peer groups
let peer_chunks = chunk_peers(peers, number_of_regions);
let peer_chunks = match region_counts {
Some(counts) => {
assert_eq!(
counts.len(),
number_of_regions,
"region_counts length must match number of regions"
);
chunk_peers_with_counts(peers, counts)
},
None => chunk_peers(peers, number_of_regions),
};
peer_chunks
.into_iter()
.zip(link_stats_table.iter())
Expand Down Expand Up @@ -217,6 +246,8 @@ pub struct MultiRegionNetworkEmulationConfig {
pub link_stats_table: LinkStatsTable,
pub inter_region_config: InterRegionNetEmConfig,
pub intra_region_config: Option<IntraRegionNetEmConfig>,
/// Optional per-region peer counts. If None, peers are distributed evenly across regions.
pub region_counts: Option<Vec<usize>>,
}

impl Default for MultiRegionNetworkEmulationConfig {
Expand All @@ -225,6 +256,7 @@ impl Default for MultiRegionNetworkEmulationConfig {
link_stats_table: get_link_stats_table(FOUR_REGION_LINK_STATS),
inter_region_config: InterRegionNetEmConfig::default(),
intra_region_config: Some(IntraRegionNetEmConfig::default()),
region_counts: None,
}
}
}
Expand All @@ -250,6 +282,35 @@ impl MultiRegionNetworkEmulationConfig {
..Default::default()
}
}

/// A four-region config that reflects the mainnet validator distribution:
/// ~70% EU, ~25% North America, ~5% Asia. The regions in the CSV are sorted
/// lexicographically, so the weights correspond to:
/// "1-gcp--eu-west2" (Netherlands / Ireland / UK) — 30%
/// "2-gcp--eu-west6" (Germany / France / CH) — 40%
/// "3-gcp--us-east4" (US East / Canada) — 20%
/// "4-gcp--as-southeast1" (Tokyo / Singapore) — 10%
/// Asia is intentionally over-represented relative to mainnet (2%) so that
/// inter-continental tail latency is exercised even with a small validator set.
pub fn four_regions_mainnet_like(num_validators: usize) -> Self {
// Weights in the same lexicographic order as the BTreeMap keys in the CSV.
let weights = [30usize, 40, 20, 10];
let total_weight: usize = weights.iter().sum();
let mut counts: Vec<usize> = weights
.iter()
.map(|&w| num_validators * w / total_weight)
.collect();
// Distribute any integer-division remainder to front regions.
let allocated: usize = counts.iter().sum();
for i in 0..(num_validators - allocated) {
counts[i % weights.len()] += 1;
}
Self {
link_stats_table: get_link_stats_table(FOUR_REGION_LINK_STATS),
region_counts: Some(counts),
..Default::default()
}
}
}

/// A test to emulate network conditions for a multi-region setup.
Expand Down Expand Up @@ -327,6 +388,7 @@ pub fn create_multi_region_swarm_network_chaos(
let peer_groups = create_link_stats_table_with_peer_groups(
all_peers,
&network_emulation_config.link_stats_table,
network_emulation_config.region_counts.as_deref(),
);

// Create the inter and intra network emulation configs
Expand Down Expand Up @@ -385,42 +447,34 @@ mod tests {
fn test_create_multi_region_swarm_network_chaos() {
aptos_logger::Logger::new().init();

// Create a config with 8 peers and multiple regions
// Default config: four regions, with intra-region netem.
// 4 intra-region + C(4,2)*2 inter-region = 4 + 12 = 16 group netems.

// Create a config with 8 peers across 4 regions (2 per region)
let all_peers: Vec<_> = (0..8).map(|_| vec![PeerId::random()]).collect();
let netem = create_multi_region_swarm_network_chaos(all_peers, None);
assert_eq!(netem.group_netems.len(), 16);

// Verify the number of group netems
assert_eq!(netem.group_netems.len(), 10);

// Create a config with 10 peers and multiple regions
let all_peers: Vec<_> = (0..10).map(|_| vec![PeerId::random()]).collect();
// Create a config with 12 peers across 4 regions (3 per region)
let all_peers: Vec<_> = (0..12).map(|_| vec![PeerId::random()]).collect();
let netem = create_multi_region_swarm_network_chaos(all_peers.clone(), None);
assert_eq!(netem.group_netems.len(), 16);

// Verify the resulting group netems
assert_eq!(netem.group_netems.len(), 10);
assert_eq!(netem.group_netems[0].source_nodes.len(), 4);
assert_eq!(netem.group_netems[0].target_nodes.len(), 4);
// Intra-region netems come first (sorted by BTreeMap key order).
// First region lexicographically is "1-gcp--eu-west2".
assert_eq!(netem.group_netems[0].source_nodes.len(), 3);
assert_eq!(netem.group_netems[0].target_nodes.len(), 3);
assert_eq!(netem.group_netems[0], GroupNetEm {
name: "aws--ap-northeast-1-self-netem".to_owned(),
name: "1-gcp--eu-west2-self-netem".to_owned(),
rate_in_mbps: 10000,
source_nodes: vec![
all_peers[0][0],
all_peers[1][0],
all_peers[8][0],
all_peers[9][0],
],
target_nodes: vec![
all_peers[0][0],
all_peers[1][0],
all_peers[8][0],
all_peers[9][0],
],
delay_latency_ms: 50,
delay_jitter_ms: 5,
delay_correlation_percentage: 50,
source_nodes: vec![all_peers[0][0], all_peers[1][0], all_peers[2][0]],
target_nodes: vec![all_peers[0][0], all_peers[1][0], all_peers[2][0]],
delay_latency_ms: 20,
delay_jitter_ms: 0,
delay_correlation_percentage: 20,
loss_percentage: 1,
loss_correlation_percentage: 50
})
loss_correlation_percentage: 20,
});
}

#[test]
Expand Down
Loading