Skip to content

Commit b64e8c6

Browse files
author
SqlRush
committed
test(cluster): spec-5.19 MG-B — t/328 2-node report-only leg + always-logged median
- M3: two-node peer-online single-writer write-tax measurement (real ClusterPair, strict quorum + shared_data; node0 writes while node1 is in quorum). REPORT ONLY: never asserts a threshold and never fails the single-node hard gate -- if the ClusterPair cannot boot/quorum/measure this run it passes with an explicit "unavailable" note. Addresses the 2-node write-path question without weakening the M1 single-node gate. - M1: emit the measured median write tax via diag() (reaches the CI log even on PASS; note() is swallowed by non-verbose prove) so the gate's headroom is visible without re-running the shard verbose. The HARD gate stays the single-node M1 tax <= 10% (rule 8.B).
1 parent 7e92253 commit b64e8c6

1 file changed

Lines changed: 124 additions & 0 deletions

File tree

src/test/cluster_tap/t/328_stage5_multinode_write_perf.pl

Lines changed: 124 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -18,6 +18,13 @@
1818
# fsync/shared_buffers, pgbench TPC-B write workload, best-of-N for
1919
# stability, ratio-based so the runner's absolute speed cancels.
2020
#
21+
# Additional report-only measurement:
22+
# two-node peer-online single-writer write tax
23+
#
24+
# This boots a real ClusterPair (strict quorum + shared_data) and measures
25+
# TPC-B writes on node0 while node1 is connected/in quorum. It is a value
26+
# report only: no percentage threshold is asserted here.
27+
#
2128
# SEPARATE capability limitation (does NOT cover or excuse the gate above):
2229
# true concurrent multi-node shared-block write competition is bounded by
2330
# cross-node holder migration (DRM = Stage 6; spec-5.57 cross-instance
@@ -39,6 +46,7 @@
3946

4047
use PostgreSQL::Test::Cluster;
4148
use PostgreSQL::Test::Utils;
49+
use PostgreSQL::Test::ClusterPair;
4250
use PostgreSQL::Test::Stage5IntegratedAcceptanceReport;
4351
use Test::More;
4452

@@ -50,6 +58,7 @@
5058
my $SECS = $ENV{PGRAC_PGBENCH_SECS} // 8;
5159
my $CLIENTS = 4;
5260
my $ROUNDS = $ENV{PGRAC_PGBENCH_ROUNDS} // 7; # interleaved rounds
61+
my $TWO_NODE_ROUNDS = $ENV{PGRAC_2NODE_PGBENCH_ROUNDS} // $ROUNDS;
5362
# The hard gate: cluster write tax must not exceed this percentage.
5463
my $GATE_PCT = $ENV{PGRAC_WRITE_TAX_GATE_PCT} // 10.0;
5564

@@ -79,6 +88,23 @@ sub median
7988
return $s[int((@s) / 2)];
8089
}
8190

91+
sub poll_sql_eq
92+
{
93+
my ($node, $sql, $want, $timeout_s) = @_;
94+
$timeout_s //= 15;
95+
my $deadline = time + $timeout_s;
96+
my $last = '(never-queried)';
97+
while (time < $deadline)
98+
{
99+
my $got = eval { $node->safe_psql('postgres', $sql); };
100+
$last = defined $got ? $got : '(undef)';
101+
return 1 if defined $got && $got eq $want;
102+
select(undef, undef, undef, 0.25);
103+
}
104+
diag("poll_sql_eq timeout after ${timeout_s}s: want='$want' last='$last' sql=$sql");
105+
return 0;
106+
}
107+
82108
# Common perf-isolation knobs (CPU-overhead measurement: fsync off removes
83109
# disk variance so the gate measures the cluster machinery's added work).
84110
my @perf_conf = (
@@ -149,6 +175,16 @@ sub median
149175
note(" cluster TPC-B median tps = " . (defined $tps_cluster ? sprintf('%.0f', $tps_cluster) : 'n/a'));
150176
note(" write tax % (median) = $tax_s (gate: <= $GATE_PCT%)");
151177

178+
# Surface the measured median to the captured CI log unconditionally: note()
179+
# is swallowed by non-verbose prove, but diag() reaches the log even on PASS.
180+
# This makes the gate's headroom (e.g. spec-5.19 MG-D v3 WAL delta effect)
181+
# visible without re-running the shard verbose.
182+
diag(sprintf("MG-B single-node write tax (median of %d rounds) = %s%% "
183+
. "(gate <= %s%%; native=%s cluster=%s median tps)",
184+
scalar(@taxes), $tax_s, $GATE_PCT,
185+
(defined $tps_native ? sprintf('%.0f', $tps_native) : 'n/a'),
186+
(defined $tps_cluster ? sprintf('%.0f', $tps_cluster) : 'n/a')));
187+
152188
ok($have_both,
153189
"M0 native + cluster single-node throughput measured over "
154190
. scalar(@taxes) . " interleaved rounds");
@@ -181,6 +217,94 @@ sub median
181217
ok($wait_events_present > 0,
182218
"M2 cluster write-path wait-event surface present ($wait_events_present rows)");
183219

220+
# ---------------------------------------------------------------------
221+
# M3: two-node peer-online write tax — REPORT ONLY.
222+
#
223+
# This is deliberately NOT a hard gate. It measures the current two-node
224+
# online shape that Stage 5 can soundly run: strict-quorum ClusterPair with
225+
# shared_data, node0 executing TPC-B writes while node1 is connected and in
226+
# quorum. True concurrent dual-writer shared-block competition remains the
227+
# separate DRM/Stage-6 limitation recorded below.
228+
# ---------------------------------------------------------------------
229+
my @two_node_tps;
230+
my $two_node_started = 0;
231+
my $two_node_ready = 0;
232+
eval {
233+
my @pair_perf_conf = map { my $line = $_; chomp $line; $line } @perf_conf;
234+
my $pair = PostgreSQL::Test::ClusterPair->new_pair(
235+
'mnw_pair',
236+
quorum_voting_disks => 3,
237+
shared_data => 1,
238+
extra_conf => [
239+
@pair_perf_conf,
240+
'cluster.quorum_poll_interval_ms = 500',
241+
'cluster.cssd_heartbeat_interval_ms = 2000',
242+
'cluster.cssd_dead_deadband_factor = 10',
243+
]);
244+
$pair->start_pair;
245+
$two_node_started = 1;
246+
$two_node_ready =
247+
$pair->wait_for_peer_state(0, 1, 'connected', 30)
248+
&& $pair->wait_for_peer_state(1, 0, 'connected', 30)
249+
&& poll_sql_eq($pair->node0, 'SELECT in_quorum FROM pg_cluster_quorum_state', 't', 20)
250+
&& poll_sql_eq($pair->node1, 'SELECT in_quorum FROM pg_cluster_quorum_state', 't', 20);
251+
252+
if ($two_node_ready && pgbench_init($pair->node0))
253+
{
254+
for my $r (1 .. $TWO_NODE_ROUNDS)
255+
{
256+
my $t = pgbench_one($pair->node0);
257+
next unless defined $t && $t > 0;
258+
push @two_node_tps, $t;
259+
note(sprintf(" two-node round %d: node0 tps=%.0f", $r, $t));
260+
}
261+
}
262+
$pair->stop_pair;
263+
1;
264+
} or do {
265+
my $err = $@ || 'unknown error';
266+
diag("M3 two-node report-only measurement failed before completion: $err");
267+
};
268+
269+
my $two_have = ($two_node_started && $two_node_ready && scalar(@two_node_tps) > 0
270+
&& defined $tps_native && $tps_native > 0);
271+
my $two_tps = scalar(@two_node_tps) ? median(@two_node_tps) : undef;
272+
my $two_tax = ($two_have && defined $two_tps)
273+
? 100.0 * (1.0 - $two_tps / $tps_native) : undef;
274+
my $two_tax_s = defined $two_tax ? sprintf('%.2f', $two_tax) : 'n/a';
275+
276+
note("MG-B two-node peer-online write-path REPORT-ONLY measurement:");
277+
note(" native single-node TPC-B median tps = "
278+
. (defined $tps_native ? sprintf('%.0f', $tps_native) : 'n/a'));
279+
note(" two-node peer-online node0 TPC-B median tps = "
280+
. (defined $two_tps ? sprintf('%.0f', $two_tps) : 'n/a'));
281+
note(" two-node write tax % (report-only) = $two_tax_s");
282+
283+
# REPORT ONLY: this leg must never fail the single-node hard gate. If the
284+
# 2-node ClusterPair could not boot / reach quorum / produce a number this run
285+
# (transient runner shmem pressure, etc.), pass with an explicit unavailable
286+
# note rather than failing -- the HARD gate is the single-node M1 tax only.
287+
if ($two_have)
288+
{
289+
diag("MG-B two-node peer-online write tax (report-only) = ${two_tax_s}%");
290+
ok(1,
291+
"M3 two-node peer-online single-writer write tax measured: ${two_tax_s}% "
292+
. "(REPORT ONLY; no threshold asserted)");
293+
}
294+
else
295+
{
296+
ok(1,
297+
"M3 two-node peer-online write tax unavailable this run "
298+
. "(REPORT ONLY; never fails the single-node hard gate)");
299+
}
300+
$report->record_multinode_write_value(2, 'tpcb-peer-online-single-writer',
301+
tps_native => (defined $tps_native ? $tps_native : 0),
302+
tps_cluster => (defined $two_tps ? $two_tps : 0),
303+
write_tax_pct => $two_tax_s,
304+
gate => 'REPORT-ONLY',
305+
note => 'ClusterPair strict-quorum + shared_data; node0 writes while node1 '
306+
. 'is connected/in quorum. No threshold asserted.');
307+
184308
# ---------------------------------------------------------------------
185309
# SOUNDNESS — the single-node tax above is REAL + gated. The TRUE concurrent
186310
# multi-node shared-block write limit is a SEPARATE capability limitation that

0 commit comments

Comments
 (0)