Skip to content

Commit 3c435b8

Browse files
sjaakolajanlindstrom
authored andcommitted
MDEV-38260 applier hang due to sequence table access
Avoiding to mark sequence table write as a DDL transaction for galera applying. Skipping commit for DDL marked GTID log event in applying as this would lead to double commit, if the transaction has also a real transaction. Such scnenario could happen if transaction has sequence table writes together with other innodb table writes. The commit contains a new mtr test galera.MDEV-38260 for testing applying of a transaction having sequence table access when using GTID mode
1 parent e55dc52 commit 3c435b8

6 files changed

Lines changed: 179 additions & 7 deletions

File tree

Lines changed: 51 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,51 @@
1+
connection node_2;
2+
connection node_1;
3+
connection node_1;
4+
SET global wsrep_slave_threads = 2;
5+
connect node_3, 127.0.0.1, root, , test, $NODE_MYPORT_3;
6+
connection node_2;
7+
START SLAVE;
8+
connection node_3;
9+
CREATE SEQUENCE my_seq INCREMENT BY 1 NOCACHE ENGINE=INNODB;
10+
CREATE TABLE t1 (f1 INTEGER PRIMARY KEY) ENGINE=InnoDB;
11+
insert into t1 values (0);
12+
BEGIN;
13+
select nextval(my_seq);
14+
nextval(my_seq)
15+
1
16+
select nextval(my_seq);
17+
nextval(my_seq)
18+
2
19+
select nextval(my_seq);
20+
nextval(my_seq)
21+
3
22+
insert into t1 values (1);
23+
COMMIT;
24+
insert into t1 values (2);
25+
delete from t1;
26+
DROP TABLE t1;
27+
CREATE TABLE t1(a int not null primary key default nextval(my_seq), b int) engine=innodb;
28+
BEGIN;
29+
INSERT INTO t1(b) VALUES(3);
30+
INSERT INTO t1(a,b) VALUES(10,4);
31+
COMMIT;
32+
delete from t1;
33+
connection node_1;
34+
SET global wsrep_slave_threads = DEFAULT;
35+
connection node_3;
36+
DROP SEQUENCE my_seq;
37+
DROP TABLE t1;
38+
connection node_1;
39+
connection node_2;
40+
STOP SLAVE;
41+
RESET SLAVE ALL;
42+
connection node_1;
43+
set global wsrep_on=OFF;
44+
reset master;
45+
set global wsrep_on=ON;
46+
connection node_2;
47+
set global wsrep_on=OFF;
48+
reset master;
49+
set global wsrep_on=ON;
50+
connection node_3;
51+
reset master;
Lines changed: 14 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,14 @@
1+
!include ../galera_2nodes_as_slave.cnf
2+
3+
[mysqld]
4+
log-bin=mysqld-bin
5+
log-slave-updates
6+
binlog-format=ROW
7+
8+
gtid_strict_mode=ON
9+
gtid_domain_id=10
10+
11+
[mysqld.1]
12+
wsrep_gtid_mode=ON
13+
[mysqld.2]
14+
wsrep_gtid_mode=ON
Lines changed: 97 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,97 @@
1+
#
2+
# Test Galera as a slave to a MariaDB master using sequence table and GTID mode
3+
#
4+
# In the problematic execution scenario, seqence access marks GTID event as DDL
5+
# and applier makes implicit commit before the actual transaction is even applied.
6+
# Second commit attempt after the actual payload applying would then wait for
7+
# commit order eternally, causing total cluster hang
8+
#
9+
#
10+
11+
--source include/have_innodb.inc
12+
--source include/have_log_bin.inc
13+
--source include/galera_cluster.inc
14+
15+
#
16+
# node_1 = galera cluster replica node
17+
# node_2 = galera cluster node, operating as replication slave for node_3
18+
# node_3 = regular mariadb server operating as replication master
19+
#
20+
21+
# enable parallel applying in galera cluster replica node
22+
--connection node_1
23+
SET global wsrep_slave_threads = 2;
24+
25+
# As node #3 is not a Galera node, and galera_cluster.inc does not open connetion to it
26+
# we open the node_3 connection here
27+
--connect node_3, 127.0.0.1, root, , test, $NODE_MYPORT_3
28+
29+
--connection node_2
30+
--disable_query_log
31+
--eval CHANGE MASTER TO MASTER_HOST='127.0.0.1', MASTER_USER='root', MASTER_PORT=$NODE_MYPORT_3;
32+
--enable_query_log
33+
START SLAVE;
34+
35+
--connection node_3
36+
CREATE SEQUENCE my_seq INCREMENT BY 1 NOCACHE ENGINE=INNODB;
37+
CREATE TABLE t1 (f1 INTEGER PRIMARY KEY) ENGINE=InnoDB;
38+
insert into t1 values (0);
39+
40+
BEGIN;
41+
# spend some sequence values
42+
select nextval(my_seq);
43+
select nextval(my_seq);
44+
select nextval(my_seq);
45+
46+
# regular insert, which may cause duplicate commit in node_1
47+
insert into t1 values (1);
48+
COMMIT;
49+
50+
# feed some more DML, which may cause hang in galera replication, node_1
51+
insert into t1 values (2);
52+
delete from t1;
53+
54+
#
55+
# Scenario where seqeunce value is used in a SQL statement
56+
#
57+
DROP TABLE t1;
58+
CREATE TABLE t1(a int not null primary key default nextval(my_seq), b int) engine=innodb;
59+
60+
BEGIN;
61+
INSERT INTO t1(b) VALUES(3);
62+
INSERT INTO t1(a,b) VALUES(10,4);
63+
COMMIT;
64+
65+
delete from t1;
66+
67+
# tear down the replication, force galera nodes to apply all remaining as single threaded
68+
--connection node_1
69+
SET global wsrep_slave_threads = DEFAULT;
70+
71+
--connection node_3
72+
DROP SEQUENCE my_seq;
73+
DROP TABLE t1;
74+
75+
# wait untill replication channel is flushed and all is applied in node_1
76+
--connection node_1
77+
--let $wait_condition = SELECT COUNT(*) = 0 FROM INFORMATION_SCHEMA.TABLES WHERE TABLE_NAME = 't1'
78+
--source include/wait_condition.inc
79+
--let $wait_condition = SELECT COUNT(*) = 0 FROM INFORMATION_SCHEMA.TABLES WHERE TABLE_NAME = 'my_seq'
80+
--source include/wait_condition.inc
81+
82+
--connection node_2
83+
STOP SLAVE;
84+
RESET SLAVE ALL;
85+
86+
--connection node_1
87+
set global wsrep_on=OFF;
88+
reset master;
89+
set global wsrep_on=ON;
90+
91+
--connection node_2
92+
set global wsrep_on=OFF;
93+
reset master;
94+
set global wsrep_on=ON;
95+
96+
--connection node_3
97+
reset master;

sql/ha_sequence.cc

Lines changed: 6 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -251,6 +251,12 @@ int ha_sequence::write_row(const uchar *buf)
251251
- Check that the new row is an accurate SEQUENCE object
252252
*/
253253
/* mark a full binlog image insert to force non-parallel slave */
254+
#ifdef WITH_WSREP
255+
if (WSREP_ON && WSREP(thd) && wsrep_thd_is_applying(thd))
256+
{
257+
WSREP_DEBUG("skipped to mark trx as DDL due to sequence table insert");
258+
} else
259+
#endif /* WITH_WSREP */
254260
thd->transaction->stmt.mark_trans_did_ddl();
255261
if (table->s->tmp_table == NO_TMP_TABLE &&
256262
thd->mdl_context.upgrade_shared_lock(table->mdl_ticket,

sql/wsrep_applier.cc

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -238,6 +238,7 @@ int wsrep_apply_events(THD* thd,
238238
}
239239

240240
typ= ev->get_type_code();
241+
WSREP_DEBUG("applying event %d, type %d, buf_len %zu", event, typ, buf_len);
241242

242243
switch (typ) {
243244
case FORMAT_DESCRIPTION_EVENT:

sql/wsrep_schema.cc

Lines changed: 10 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -34,6 +34,7 @@
3434
#include "log_event.h"
3535
#include "sql_class.h"
3636

37+
#include <mysql/psi/mysql_transaction.h>
3738
#include <string>
3839
#include <sstream>
3940

@@ -1663,13 +1664,15 @@ int Wsrep_schema::store_gtid_event(THD* thd,
16631664

16641665
if (in_ddl)
16651666
{
1666-
// Commit transaction if this GTID is part of DDL-clause because
1667-
// DDL causes implicit commit assuming there is no multi statement
1668-
// transaction ongoing.
1669-
if((error= trans_commit_stmt(thd)))
1670-
goto out;
1671-
1672-
(void)trans_commit(thd);
1667+
/* gtid slave state recording above has started a trasaction and called for
1668+
statemet commit. Resetting here the transaction state for the actual DDL
1669+
execution to happen by following events
1670+
*/
1671+
thd->transaction->cleanup();
1672+
MYSQL_COMMIT_TRANSACTION(thd->m_transaction_psi);
1673+
thd->m_transaction_psi= NULL;
1674+
thd->server_status&=
1675+
~(SERVER_STATUS_IN_TRANS | SERVER_STATUS_IN_TRANS_READONLY);
16731676
}
16741677

16751678
out:

0 commit comments

Comments
 (0)