|
18 | 18 | # fsync/shared_buffers, pgbench TPC-B write workload, best-of-N for |
19 | 19 | # stability, ratio-based so the runner's absolute speed cancels. |
20 | 20 | # |
| 21 | +# Additional report-only measurement: |
| 22 | +# two-node peer-online single-writer write tax |
| 23 | +# |
| 24 | +# This boots a real ClusterPair (strict quorum + shared_data) and measures |
| 25 | +# TPC-B writes on node0 while node1 is connected/in quorum. It is a value |
| 26 | +# report only: no percentage threshold is asserted here. |
| 27 | +# |
21 | 28 | # SEPARATE capability limitation (does NOT cover or excuse the gate above): |
22 | 29 | # true concurrent multi-node shared-block write competition is bounded by |
23 | 30 | # cross-node holder migration (DRM = Stage 6; spec-5.57 cross-instance |
|
39 | 46 |
|
40 | 47 | use PostgreSQL::Test::Cluster; |
41 | 48 | use PostgreSQL::Test::Utils; |
| 49 | +use PostgreSQL::Test::ClusterPair; |
42 | 50 | use PostgreSQL::Test::Stage5IntegratedAcceptanceReport; |
43 | 51 | use Test::More; |
44 | 52 |
|
|
50 | 58 | my $SECS = $ENV{PGRAC_PGBENCH_SECS} // 8; |
51 | 59 | my $CLIENTS = 4; |
52 | 60 | my $ROUNDS = $ENV{PGRAC_PGBENCH_ROUNDS} // 7; # interleaved rounds |
| 61 | +my $TWO_NODE_ROUNDS = $ENV{PGRAC_2NODE_PGBENCH_ROUNDS} // $ROUNDS; |
53 | 62 | # The hard gate: cluster write tax must not exceed this percentage. |
54 | 63 | my $GATE_PCT = $ENV{PGRAC_WRITE_TAX_GATE_PCT} // 10.0; |
55 | 64 |
|
@@ -79,6 +88,23 @@ sub median |
79 | 88 | return $s[int((@s) / 2)]; |
80 | 89 | } |
81 | 90 |
|
| 91 | +sub poll_sql_eq |
| 92 | +{ |
| 93 | + my ($node, $sql, $want, $timeout_s) = @_; |
| 94 | + $timeout_s //= 15; |
| 95 | + my $deadline = time + $timeout_s; |
| 96 | + my $last = '(never-queried)'; |
| 97 | + while (time < $deadline) |
| 98 | + { |
| 99 | + my $got = eval { $node->safe_psql('postgres', $sql); }; |
| 100 | + $last = defined $got ? $got : '(undef)'; |
| 101 | + return 1 if defined $got && $got eq $want; |
| 102 | + select(undef, undef, undef, 0.25); |
| 103 | + } |
| 104 | + diag("poll_sql_eq timeout after ${timeout_s}s: want='$want' last='$last' sql=$sql"); |
| 105 | + return 0; |
| 106 | +} |
| 107 | + |
82 | 108 | # Common perf-isolation knobs (CPU-overhead measurement: fsync off removes |
83 | 109 | # disk variance so the gate measures the cluster machinery's added work). |
84 | 110 | my @perf_conf = ( |
@@ -149,6 +175,16 @@ sub median |
149 | 175 | note(" cluster TPC-B median tps = " . (defined $tps_cluster ? sprintf('%.0f', $tps_cluster) : 'n/a')); |
150 | 176 | note(" write tax % (median) = $tax_s (gate: <= $GATE_PCT%)"); |
151 | 177 |
|
| 178 | +# Surface the measured median to the captured CI log unconditionally: note() |
| 179 | +# is swallowed by non-verbose prove, but diag() reaches the log even on PASS. |
| 180 | +# This makes the gate's headroom (e.g. spec-5.19 MG-D v3 WAL delta effect) |
| 181 | +# visible without re-running the shard verbose. |
| 182 | +diag(sprintf("MG-B single-node write tax (median of %d rounds) = %s%% " |
| 183 | + . "(gate <= %s%%; native=%s cluster=%s median tps)", |
| 184 | + scalar(@taxes), $tax_s, $GATE_PCT, |
| 185 | + (defined $tps_native ? sprintf('%.0f', $tps_native) : 'n/a'), |
| 186 | + (defined $tps_cluster ? sprintf('%.0f', $tps_cluster) : 'n/a'))); |
| 187 | + |
152 | 188 | ok($have_both, |
153 | 189 | "M0 native + cluster single-node throughput measured over " |
154 | 190 | . scalar(@taxes) . " interleaved rounds"); |
@@ -181,6 +217,94 @@ sub median |
181 | 217 | ok($wait_events_present > 0, |
182 | 218 | "M2 cluster write-path wait-event surface present ($wait_events_present rows)"); |
183 | 219 |
|
| 220 | +# --------------------------------------------------------------------- |
| 221 | +# M3: two-node peer-online write tax — REPORT ONLY. |
| 222 | +# |
| 223 | +# This is deliberately NOT a hard gate. It measures the current two-node |
| 224 | +# online shape that Stage 5 can soundly run: strict-quorum ClusterPair with |
| 225 | +# shared_data, node0 executing TPC-B writes while node1 is connected and in |
| 226 | +# quorum. True concurrent dual-writer shared-block competition remains the |
| 227 | +# separate DRM/Stage-6 limitation recorded below. |
| 228 | +# --------------------------------------------------------------------- |
| 229 | +my @two_node_tps; |
| 230 | +my $two_node_started = 0; |
| 231 | +my $two_node_ready = 0; |
| 232 | +eval { |
| 233 | + my @pair_perf_conf = map { my $line = $_; chomp $line; $line } @perf_conf; |
| 234 | + my $pair = PostgreSQL::Test::ClusterPair->new_pair( |
| 235 | + 'mnw_pair', |
| 236 | + quorum_voting_disks => 3, |
| 237 | + shared_data => 1, |
| 238 | + extra_conf => [ |
| 239 | + @pair_perf_conf, |
| 240 | + 'cluster.quorum_poll_interval_ms = 500', |
| 241 | + 'cluster.cssd_heartbeat_interval_ms = 2000', |
| 242 | + 'cluster.cssd_dead_deadband_factor = 10', |
| 243 | + ]); |
| 244 | + $pair->start_pair; |
| 245 | + $two_node_started = 1; |
| 246 | + $two_node_ready = |
| 247 | + $pair->wait_for_peer_state(0, 1, 'connected', 30) |
| 248 | + && $pair->wait_for_peer_state(1, 0, 'connected', 30) |
| 249 | + && poll_sql_eq($pair->node0, 'SELECT in_quorum FROM pg_cluster_quorum_state', 't', 20) |
| 250 | + && poll_sql_eq($pair->node1, 'SELECT in_quorum FROM pg_cluster_quorum_state', 't', 20); |
| 251 | + |
| 252 | + if ($two_node_ready && pgbench_init($pair->node0)) |
| 253 | + { |
| 254 | + for my $r (1 .. $TWO_NODE_ROUNDS) |
| 255 | + { |
| 256 | + my $t = pgbench_one($pair->node0); |
| 257 | + next unless defined $t && $t > 0; |
| 258 | + push @two_node_tps, $t; |
| 259 | + note(sprintf(" two-node round %d: node0 tps=%.0f", $r, $t)); |
| 260 | + } |
| 261 | + } |
| 262 | + $pair->stop_pair; |
| 263 | + 1; |
| 264 | +} or do { |
| 265 | + my $err = $@ || 'unknown error'; |
| 266 | + diag("M3 two-node report-only measurement failed before completion: $err"); |
| 267 | +}; |
| 268 | + |
| 269 | +my $two_have = ($two_node_started && $two_node_ready && scalar(@two_node_tps) > 0 |
| 270 | + && defined $tps_native && $tps_native > 0); |
| 271 | +my $two_tps = scalar(@two_node_tps) ? median(@two_node_tps) : undef; |
| 272 | +my $two_tax = ($two_have && defined $two_tps) |
| 273 | + ? 100.0 * (1.0 - $two_tps / $tps_native) : undef; |
| 274 | +my $two_tax_s = defined $two_tax ? sprintf('%.2f', $two_tax) : 'n/a'; |
| 275 | + |
| 276 | +note("MG-B two-node peer-online write-path REPORT-ONLY measurement:"); |
| 277 | +note(" native single-node TPC-B median tps = " |
| 278 | + . (defined $tps_native ? sprintf('%.0f', $tps_native) : 'n/a')); |
| 279 | +note(" two-node peer-online node0 TPC-B median tps = " |
| 280 | + . (defined $two_tps ? sprintf('%.0f', $two_tps) : 'n/a')); |
| 281 | +note(" two-node write tax % (report-only) = $two_tax_s"); |
| 282 | + |
| 283 | +# REPORT ONLY: this leg must never fail the single-node hard gate. If the |
| 284 | +# 2-node ClusterPair could not boot / reach quorum / produce a number this run |
| 285 | +# (transient runner shmem pressure, etc.), pass with an explicit unavailable |
| 286 | +# note rather than failing -- the HARD gate is the single-node M1 tax only. |
| 287 | +if ($two_have) |
| 288 | +{ |
| 289 | + diag("MG-B two-node peer-online write tax (report-only) = ${two_tax_s}%"); |
| 290 | + ok(1, |
| 291 | + "M3 two-node peer-online single-writer write tax measured: ${two_tax_s}% " |
| 292 | + . "(REPORT ONLY; no threshold asserted)"); |
| 293 | +} |
| 294 | +else |
| 295 | +{ |
| 296 | + ok(1, |
| 297 | + "M3 two-node peer-online write tax unavailable this run " |
| 298 | + . "(REPORT ONLY; never fails the single-node hard gate)"); |
| 299 | +} |
| 300 | +$report->record_multinode_write_value(2, 'tpcb-peer-online-single-writer', |
| 301 | + tps_native => (defined $tps_native ? $tps_native : 0), |
| 302 | + tps_cluster => (defined $two_tps ? $two_tps : 0), |
| 303 | + write_tax_pct => $two_tax_s, |
| 304 | + gate => 'REPORT-ONLY', |
| 305 | + note => 'ClusterPair strict-quorum + shared_data; node0 writes while node1 ' |
| 306 | + . 'is connected/in quorum. No threshold asserted.'); |
| 307 | + |
184 | 308 | # --------------------------------------------------------------------- |
185 | 309 | # SOUNDNESS — the single-node tax above is REAL + gated. The TRUE concurrent |
186 | 310 | # multi-node shared-block write limit is a SEPARATE capability limitation that |
|
0 commit comments