Skip to content

Commit b66db0f

Browse files
committed
MDEV-32115: Log checkpoint race with wsrep_sst_method=rsync
Galera snapshot transfer (SST) using the default wsrep_sst_method=rsync is prone to creating corrupted snapshots. The probability for this is rather low and might only affect installations that include ENGINE=InnoDB tables that contain FULLTEXT INDEX. The function sst_disable_innodb_writes() aims to disable all InnoDB writes during the time a snapshot transfer (SST) is in progress using the default wsrep_sst_method=rsync. The logic based on invoking log_make_checkpoint() almost works, except for two things: We failed to ensure that fts_optimize_callback() has stopped executing, and we did not block updates of the log checkpoint header. log_checkpoint_low(): Assert that writes to the log are allowed. buf_flush_page_cleaner(): Do not try to advance the checkpoint while wsrep_sst_method=rsync is in progress. This prevents the assertion in log_checkpoint_low() from failing. fts_optimize_pause(), fts_optimize_resume(): Pause and resume the fts_optimize_callback(). sst_disable_innodb_writes(): Disable all background writers before initiating the log checkpoint. fts_optimize_callback(): Assert that wsrep_sst_method=rsync is not active, and remove the previous incorrect attempt at fixing this race.
1 parent 3d2ae78 commit b66db0f

4 files changed

Lines changed: 63 additions & 45 deletions

File tree

storage/innobase/buf/buf0flu.cc

Lines changed: 6 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1966,6 +1966,7 @@ inline void log_t::write_checkpoint(lsn_t checkpoint, lsn_t end_lsn) noexcept
19661966
static void log_checkpoint_low(lsn_t oldest_lsn, lsn_t end_lsn) noexcept
19671967
{
19681968
ut_ad(!srv_read_only_mode);
1969+
ut_ad(!recv_no_log_write);
19691970
ut_ad(log_sys.latch_have_wr());
19701971
ut_ad(oldest_lsn <= end_lsn);
19711972
ut_ad(end_lsn == log_sys.get_lsn());
@@ -2550,6 +2551,11 @@ static void buf_flush_page_cleaner() noexcept
25502551
{
25512552
if (recv_recovery_is_on())
25522553
continue;
2554+
#ifdef WITH_WSREP
2555+
extern Atomic_relaxed<bool> wsrep_sst_disable_writes;
2556+
if (UNIV_UNLIKELY(wsrep_sst_disable_writes))
2557+
continue; /* See sst_disable_innodb_writes() */
2558+
#endif
25532559
IF_DBUG(if (log_sys.last_checkpoint_lsn &&
25542560
srv_shutdown_state < SRV_SHUTDOWN_CLEANUP &&
25552561
(_db_keyword_(nullptr, "ib_log_checkpoint_avoid", 1) ||

storage/innobase/fts/fts0opt.cc

Lines changed: 38 additions & 35 deletions
Original file line numberDiff line numberDiff line change
@@ -37,13 +37,6 @@ Completed 2011/7/10 Sunny and Jimmy Yang
3737
#include "zlib.h"
3838
#include "fts0opt.h"
3939
#include "fts0vlc.h"
40-
#include "wsrep.h"
41-
42-
#ifdef WITH_WSREP
43-
extern Atomic_relaxed<bool> wsrep_sst_disable_writes;
44-
#else
45-
constexpr bool wsrep_sst_disable_writes= false;
46-
#endif
4740

4841
/** The FTS optimize thread's work queue. */
4942
ib_wqueue_t* fts_optimize_wq;
@@ -52,7 +45,7 @@ static void timer_callback(void*);
5245
static tpool::timer* timer;
5346

5447
static tpool::task_group task_group(1);
55-
static tpool::task task(fts_optimize_callback,0, &task_group);
48+
static tpool::waitable_task task(fts_optimize_callback,0, &task_group);
5649

5750
/** FTS optimize thread, for MDL acquisition */
5851
static THD *fts_opt_thd;
@@ -230,7 +223,7 @@ ulong fts_num_word_optimize;
230223
char fts_enable_diag_print;
231224

232225
/** ZLib compressed block size.*/
233-
static ulint FTS_ZIP_BLOCK_SIZE = 1024;
226+
static constexpr ulint FTS_ZIP_BLOCK_SIZE = 1024;
234227

235228
/** The amount of time optimizing in a single pass, in seconds. */
236229
static ulint fts_optimize_time_limit;
@@ -2831,6 +2824,10 @@ static void fts_optimize_callback(void *)
28312824
static ulint n_tables = ib_vector_size(fts_slots);
28322825

28332826
while (!done && srv_shutdown_state <= SRV_SHUTDOWN_INITIATED) {
2827+
#ifdef WITH_WSREP
2828+
ut_d(extern Atomic_relaxed<bool> wsrep_sst_disable_writes);
2829+
ut_ad(!wsrep_sst_disable_writes);
2830+
#endif
28342831
/* If there is no message in the queue and we have tables
28352832
to optimize then optimize the tables. */
28362833

@@ -2841,17 +2838,6 @@ static void fts_optimize_callback(void *)
28412838

28422839
/* The queue is empty but we have tables
28432840
to optimize. */
2844-
if (UNIV_UNLIKELY(wsrep_sst_disable_writes)) {
2845-
retry_later:
2846-
if (fts_is_sync_needed()) {
2847-
fts_need_sync = true;
2848-
}
2849-
if (n_tables) {
2850-
timer->set_time(5000, 0);
2851-
}
2852-
return;
2853-
}
2854-
28552841
fts_slot_t* slot = static_cast<fts_slot_t*>(
28562842
ib_vector_get(fts_slots, current));
28572843

@@ -2872,7 +2858,13 @@ static void fts_optimize_callback(void *)
28722858
(ib_wqueue_nowait(fts_optimize_wq));
28732859
/* Timeout ? */
28742860
if (!msg) {
2875-
goto retry_later;
2861+
if (fts_is_sync_needed()) {
2862+
fts_need_sync = true;
2863+
}
2864+
if (n_tables) {
2865+
timer->set_time(5000, 0);
2866+
}
2867+
return;
28762868
}
28772869

28782870
switch (msg->type) {
@@ -2898,11 +2890,6 @@ static void fts_optimize_callback(void *)
28982890
break;
28992891

29002892
case FTS_MSG_SYNC_TABLE:
2901-
if (UNIV_UNLIKELY(wsrep_sst_disable_writes)) {
2902-
add_msg(msg);
2903-
goto retry_later;
2904-
}
2905-
29062893
DBUG_EXECUTE_IF(
29072894
"fts_instrument_msg_sync_sleep",
29082895
std::this_thread::sleep_for(
@@ -2945,11 +2932,8 @@ static void fts_optimize_callback(void *)
29452932
ib::info() << "FTS optimize thread exiting.";
29462933
}
29472934

2948-
/**********************************************************************//**
2949-
Startup the optimize thread and create the work queue. */
2950-
void
2951-
fts_optimize_init(void)
2952-
/*===================*/
2935+
/** Startup the optimize task and create the work queue. */
2936+
void fts_optimize_init()
29532937
{
29542938
mem_heap_t* heap;
29552939
ib_alloc_t* heap_alloc;
@@ -2993,9 +2977,8 @@ fts_optimize_init(void)
29932977
last_check_sync_time = time(NULL);
29942978
}
29952979

2996-
/** Shutdown fts optimize thread. */
2997-
void
2998-
fts_optimize_shutdown()
2980+
/** Shut down the fts optimize thread. */
2981+
void fts_optimize_shutdown()
29992982
{
30002983
ut_ad(!srv_read_only_mode);
30012984

@@ -3004,7 +2987,7 @@ fts_optimize_shutdown()
30042987
dict_sys.freeze(SRW_LOCK_CALL);
30052988
mysql_mutex_lock(&fts_optimize_wq->mutex);
30062989
/* Tells FTS optimizer system that we are exiting from
3007-
optimizer thread, message send their after will not be
2990+
optimizer thread, messages sent thereafter will not be
30082991
processed */
30092992
fts_opt_start_shutdown = true;
30102993
dict_sys.unfreeze();
@@ -3034,6 +3017,26 @@ fts_optimize_shutdown()
30343017
timer = NULL;
30353018
}
30363019

3020+
#ifdef WITH_WSREP
3021+
/** Pause the optimize subsystem. */
3022+
void fts_optimize_pause()
3023+
{
3024+
ut_ad(!srv_read_only_mode);
3025+
/* Prevent fts_optimize_callback() from being scheduled. */
3026+
timer->disarm();
3027+
/* Wait for any current fts_optimize_callback() to finish. */
3028+
task.wait();
3029+
}
3030+
3031+
/** Resume after fts_optimize_stop() */
3032+
void fts_optimize_resume()
3033+
{
3034+
/* Schedule fts_optimize_callback() immediately.
3035+
It will reschedule itself via the timer when needed. */
3036+
srv_thread_pool->submit_task(&task);
3037+
}
3038+
#endif
3039+
30373040
/** Sync the table during commit phase
30383041
@param[in] table table to be synced */
30393042
void fts_sync_during_ddl(dict_table_t* table)

storage/innobase/handler/ha_innodb.cc

Lines changed: 8 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -1779,9 +1779,10 @@ static void sst_disable_innodb_writes()
17791779
fil_crypt_set_thread_cnt(0);
17801780
srv_n_fil_crypt_threads= old_count;
17811781

1782-
wsrep_sst_disable_writes= true;
17831782
dict_stats_shutdown();
1783+
fts_optimize_pause();
17841784
purge_sys.stop();
1785+
17851786
/* We are holding a global MDL thanks to FLUSH TABLES WITH READ LOCK.
17861787

17871788
That will prevent any writes from arriving into InnoDB, but it will
@@ -1793,10 +1794,12 @@ static void sst_disable_innodb_writes()
17931794
possible during the snapshot, and to guarantee that no crash
17941795
recovery will be necessary when starting up on the snapshot. */
17951796
log_make_checkpoint();
1797+
wsrep_sst_disable_writes= true;
17961798
/* If any FILE_MODIFY records were written by the checkpoint, an
17971799
extra write of a FILE_CHECKPOINT record could still be invoked by
1798-
buf_flush_page_cleaner(). Let us prevent that by invoking another
1799-
checkpoint (which will write the FILE_CHECKPOINT record). */
1800+
buf_flush_page_cleaner(). Let us ensure that the page cleaner
1801+
is idle and will observe our above assignment (not write anything
1802+
further to the log). */
18001803
log_make_checkpoint();
18011804
ut_d(recv_no_log_write= true);
18021805
/* If this were not a no-op, an assertion would fail due to
@@ -1811,6 +1814,8 @@ static void sst_enable_innodb_writes()
18111814
dict_stats_start();
18121815
purge_sys.resume();
18131816
wsrep_sst_disable_writes= false;
1817+
/* Allow fts_optimize_callback() to assert that the flag is clear. */
1818+
fts_optimize_resume();
18141819
const uint old_count= srv_n_fil_crypt_threads;
18151820
srv_n_fil_crypt_threads= 0;
18161821
fil_crypt_set_thread_cnt(old_count);

storage/innobase/include/fts0fts.h

Lines changed: 11 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -624,11 +624,8 @@ fts_optimize_table(
624624
/*===============*/
625625
dict_table_t* table); /*!< in: table to optimiza */
626626

627-
/**********************************************************************//**
628-
Startup the optimize thread and create the work queue. */
629-
void
630-
fts_optimize_init(void);
631-
/*====================*/
627+
/** Startup the optimize task and create the work queue. */
628+
void fts_optimize_init();
632629

633630
/****************************************************************//**
634631
Drops index ancillary tables for a FTS index
@@ -651,8 +648,15 @@ fts_optimize_remove_table(
651648
dict_table_t* table); /*!< in: table to remove */
652649

653650
/** Shutdown fts optimize thread. */
654-
void
655-
fts_optimize_shutdown();
651+
void fts_optimize_shutdown();
652+
653+
#ifdef WITH_WSREP
654+
/** Pause the optimize subsystem. */
655+
void fts_optimize_pause();
656+
657+
/** Resume after fts_optimize_pause() */
658+
void fts_optimize_resume();
659+
#endif
656660

657661
/** Send sync fts cache for the table.
658662
@param[in] table table to sync */

0 commit comments

Comments
 (0)