Skip to content

Commit ac20be7

Browse files
committed
MDEV-21423 - lock-free trx_sys get performance regression cause by lf_find and ut_delay
TBD
1 parent c8bfb4d commit ac20be7

9 files changed

Lines changed: 140 additions & 67 deletions

File tree

storage/innobase/include/trx0purge.h

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -39,7 +39,8 @@ Remove the undo log segment from the rseg slot if it is too big for reuse.
3939
@param[in,out] undo undo log
4040
@param[in,out] mtr mini-transaction */
4141
void
42-
trx_purge_add_undo_to_history(const trx_t* trx, trx_undo_t*& undo, mtr_t* mtr);
42+
trx_purge_add_undo_to_history(const trx_t* trx, trx_undo_t*& undo, mtr_t* mtr,
43+
trx_id_t end);
4344

4445
/**
4546
Remove unnecessary history data from rollback segments. NOTE that when this

storage/innobase/include/trx0sys.h

Lines changed: 120 additions & 58 deletions
Original file line numberDiff line numberDiff line change
@@ -340,14 +340,6 @@ struct rw_trx_hash_element_t
340340

341341

342342
trx_id_t id; /* lf_hash_init() relies on this to be first in the struct */
343-
344-
/**
345-
Transaction serialization number.
346-
347-
Assigned shortly before the transaction is moved to COMMITTED_IN_MEMORY
348-
state. Initially set to TRX_ID_MAX.
349-
*/
350-
Atomic_counter<trx_id_t> no;
351343
trx_t *trx;
352344
srw_mutex mutex;
353345
};
@@ -443,7 +435,6 @@ class rw_trx_hash_t
443435
ut_ad(element->trx == 0);
444436
element->trx= trx;
445437
element->id= trx->id;
446-
element->no= TRX_ID_MAX;
447438
trx->rw_trx_hash_element= element;
448439
}
449440

@@ -512,7 +503,6 @@ class rw_trx_hash_t
512503
if (element->trx)
513504
validate_element(element->trx);
514505
element->mutex.wr_unlock();
515-
ut_ad(element->id < element->no);
516506
return arg->action(element, arg->argument);
517507
}
518508
#endif
@@ -849,6 +839,107 @@ class thread_safe_trx_ilist_t
849839
alignas(CPU_LEVEL1_DCACHE_LINESIZE) ilist<trx_t> trx_list;
850840
};
851841

842+
class rw_trx_vector
843+
{
844+
struct rw_trx_id
845+
{
846+
Atomic_relaxed<trx_id_t> id{TRX_ID_MAX};
847+
Atomic_relaxed<trx_id_t> no{TRX_ID_MAX};
848+
trx_t *trx;
849+
rw_trx_id(trx_t *t): trx(t) {}
850+
};
851+
std::vector<rw_trx_id, ut_allocator<rw_trx_id>>
852+
ids{ut_allocator<rw_trx_id>(mem_key_trx_sys_t_rw_trx_ids)};
853+
mutable srw_spin_lock_low latch;
854+
855+
public:
856+
void assign_new_trx_no(const trx_t *trx, trx_id_t no) noexcept
857+
{
858+
latch.rd_lock();
859+
ut_ad(trx->rw_trx_ids_slot < ids.size());
860+
ut_ad(ids[trx->rw_trx_ids_slot].trx == trx);
861+
ut_ad(ids[trx->rw_trx_ids_slot].id == trx->id);
862+
ut_ad(ids[trx->rw_trx_ids_slot].no == TRX_ID_MAX);
863+
ids[trx->rw_trx_ids_slot].no= no;
864+
latch.rd_unlock();
865+
}
866+
trx_id_t snapshot_ids(trx_ids_t &view_ids,
867+
const trx_id_t max_trx_id) const noexcept
868+
{
869+
trx_id_t min_trx_no{max_trx_id};
870+
view_ids.clear();
871+
latch.rd_lock();
872+
view_ids.reserve(ids.size());
873+
for (const auto &it : ids)
874+
{
875+
trx_id_t id{it.id};
876+
if (id < max_trx_id)
877+
{
878+
view_ids.push_back(id);
879+
const trx_id_t no{it.no};
880+
if (no < min_trx_no)
881+
min_trx_no= no;
882+
}
883+
}
884+
latch.rd_unlock();
885+
return min_trx_no;
886+
}
887+
void register_rw(const trx_t *trx) noexcept
888+
{
889+
latch.rd_lock();
890+
ut_ad(trx->rw_trx_ids_slot < ids.size());
891+
ut_ad(ids[trx->rw_trx_ids_slot].trx == trx);
892+
ut_ad(ids[trx->rw_trx_ids_slot].id == TRX_ID_MAX);
893+
ut_ad(ids[trx->rw_trx_ids_slot].no == TRX_ID_MAX);
894+
ids[trx->rw_trx_ids_slot].id= trx->id;
895+
latch.rd_unlock();
896+
}
897+
void deregister_rw(const trx_t *trx) noexcept
898+
{
899+
latch.rd_lock();
900+
ut_ad(trx->rw_trx_ids_slot < ids.size());
901+
rw_trx_id &slot= ids[trx->rw_trx_ids_slot];
902+
ut_ad(slot.trx == trx);
903+
ut_ad(slot.id == trx->id);
904+
slot.id= TRX_ID_MAX;
905+
slot.no= TRX_ID_MAX;
906+
latch.rd_unlock();
907+
}
908+
void register_trx(trx_t *trx) noexcept
909+
{
910+
ut_ad(trx->rw_trx_ids_slot == std::numeric_limits<uint32_t>::max());
911+
latch.wr_lock();
912+
trx->rw_trx_ids_slot= static_cast<uint32_t>(ids.size());
913+
ids.emplace_back(trx);
914+
latch.wr_unlock();
915+
}
916+
void deregister_trx(trx_t *trx) noexcept
917+
{
918+
latch.wr_lock();
919+
ut_ad(trx->rw_trx_ids_slot < ids.size());
920+
ut_ad(ids[trx->rw_trx_ids_slot].trx == trx);
921+
if (trx->rw_trx_ids_slot + 1 < ids.size())
922+
{
923+
trx_t *move_trx= ids.back().trx;
924+
ids[trx->rw_trx_ids_slot]= std::move(ids.back());
925+
move_trx->rw_trx_ids_slot= trx->rw_trx_ids_slot;
926+
}
927+
ids.pop_back();
928+
latch.wr_unlock();
929+
trx->rw_trx_ids_slot= std::numeric_limits<uint32_t>::max();
930+
}
931+
void create() noexcept
932+
{
933+
ut_ad(ids.size() == 0);
934+
latch.init();
935+
}
936+
void destroy() noexcept
937+
{
938+
ut_ad(ids.size() == 0);
939+
latch.destroy();
940+
}
941+
};
942+
852943
/** The transaction system central memory data structure. */
853944
class trx_sys_t
854945
{
@@ -876,6 +967,8 @@ class trx_sys_t
876967
/** False if there is no undo log to purge or rollback */
877968
bool undo_log_nonempty;
878969
public:
970+
rw_trx_vector rw_trx_ids;
971+
879972
/** List of all transactions. */
880973
thread_safe_trx_ilist_t trx_list;
881974

@@ -1014,7 +1107,7 @@ class trx_sys_t
10141107
next call to trx_sys.get_new_trx_id()
10151108
*/
10161109

1017-
trx_id_t get_max_trx_id()
1110+
trx_id_t get_max_trx_id() const noexcept
10181111
{
10191112
return m_max_trx_id;
10201113
}
@@ -1055,19 +1148,18 @@ class trx_sys_t
10551148
10561149
@param trx transaction
10571150
*/
1058-
void assign_new_trx_no(trx_t *trx)
1151+
trx_id_t assign_new_trx_no(trx_t *trx)
10591152
{
1060-
trx->rw_trx_hash_element->no= get_new_trx_id_no_refresh();
1153+
trx_id_t no= get_new_trx_id_no_refresh();
1154+
rw_trx_ids.assign_new_trx_no(trx, no);
10611155
refresh_rw_trx_hash_version();
1156+
return no;
10621157
}
10631158

10641159

10651160
/**
10661161
Takes MVCC snapshot.
10671162
1068-
To reduce malloc probability we reserve rw_trx_hash.size() + 32 elements
1069-
in ids.
1070-
10711163
For details about get_rw_trx_hash_version() != get_max_trx_id() spin
10721164
@sa register_rw() and @sa assign_new_trx_no().
10731165
@@ -1078,27 +1170,18 @@ class trx_sys_t
10781170
of rw_trx_hash.iterate_no_dups(). It means that some transaction
10791171
identifiers may appear multiple times in ids.
10801172
1081-
@param[in,out] caller_trx used to get access to rw_trx_hash_pins
10821173
@param[out] ids array to store registered transaction identifiers
10831174
@param[out] max_trx_id variable to store m_max_trx_id value
1084-
@param[out] mix_trx_no variable to store min(no) value
1175+
1176+
@return min(no)
10851177
*/
10861178

1087-
void snapshot_ids(trx_t *caller_trx, trx_ids_t *ids, trx_id_t *max_trx_id,
1088-
trx_id_t *min_trx_no)
1179+
trx_id_t snapshot_ids(trx_ids_t &ids, trx_id_t &max_trx_id) const noexcept
10891180
{
1090-
snapshot_ids_arg arg(ids);
1091-
1092-
while ((arg.m_id= get_rw_trx_hash_version()) != get_max_trx_id())
1181+
while ((max_trx_id= get_rw_trx_hash_version()) != get_max_trx_id())
10931182
ut_delay(1);
1094-
arg.m_no= arg.m_id;
1095-
1096-
ids->clear();
1097-
ids->reserve(rw_trx_hash.size() + 32);
1098-
rw_trx_hash.iterate(caller_trx, copy_one_id, &arg);
10991183

1100-
*max_trx_id= arg.m_id;
1101-
*min_trx_no= arg.m_no;
1184+
return rw_trx_ids.snapshot_ids(ids, max_trx_id);
11021185
}
11031186

11041187

@@ -1166,8 +1249,9 @@ class trx_sys_t
11661249
void register_rw(trx_t *trx)
11671250
{
11681251
trx->id= get_new_trx_id_no_refresh();
1169-
rw_trx_hash.insert(trx);
1252+
rw_trx_ids.register_rw(trx);
11701253
refresh_rw_trx_hash_version();
1254+
rw_trx_hash.insert(trx);
11711255
}
11721256

11731257

@@ -1178,8 +1262,9 @@ class trx_sys_t
11781262
MVCC snapshot won't see this transaction anymore.
11791263
*/
11801264

1181-
void deregister_rw(trx_t *trx)
1265+
void deregister_rw(trx_t *trx) noexcept
11821266
{
1267+
rw_trx_ids.deregister_rw(trx);
11831268
rw_trx_hash.erase(trx);
11841269
}
11851270

@@ -1204,6 +1289,7 @@ class trx_sys_t
12041289
void register_trx(trx_t *trx)
12051290
{
12061291
trx_list.push_front(*trx);
1292+
rw_trx_ids.register_trx(trx);
12071293
}
12081294

12091295

@@ -1214,6 +1300,7 @@ class trx_sys_t
12141300
*/
12151301
void deregister_trx(trx_t *trx)
12161302
{
1303+
rw_trx_ids.deregister_trx(trx);
12171304
trx_list.remove(*trx);
12181305
}
12191306

@@ -1266,33 +1353,8 @@ class trx_sys_t
12661353
private:
12671354
static my_bool find_same_or_older_callback(void *el, void *i) noexcept;
12681355

1269-
1270-
struct snapshot_ids_arg
1271-
{
1272-
snapshot_ids_arg(trx_ids_t *ids): m_ids(ids) {}
1273-
trx_ids_t *m_ids;
1274-
trx_id_t m_id;
1275-
trx_id_t m_no;
1276-
};
1277-
1278-
1279-
static my_bool copy_one_id(void* el, void *a)
1280-
{
1281-
auto element= static_cast<const rw_trx_hash_element_t *>(el);
1282-
auto arg= static_cast<snapshot_ids_arg*>(a);
1283-
if (element->id < arg->m_id)
1284-
{
1285-
trx_id_t no= element->no;
1286-
arg->m_ids->push_back(element->id);
1287-
if (no < arg->m_no)
1288-
arg->m_no= no;
1289-
}
1290-
return 0;
1291-
}
1292-
1293-
12941356
/** Getter for m_rw_trx_hash_version, must issue ACQUIRE memory barrier. */
1295-
trx_id_t get_rw_trx_hash_version()
1357+
trx_id_t get_rw_trx_hash_version() const noexcept
12961358
{
12971359
return m_rw_trx_hash_version.load(std::memory_order_acquire);
12981360
}

storage/innobase/include/trx0trx.h

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -629,6 +629,8 @@ struct trx_t : ilist_node<>
629629

630630

631631
public:
632+
/** trx_sys.rw_trx_ids index, protected by mutex */
633+
uint32_t rw_trx_ids_slot;
632634
/** Transaction identifier (0 if no locks were acquired).
633635
Set by trx_sys_t::register_rw() or trx_resurrect() before
634636
the transaction is added to trx_sys.rw_trx_hash.

storage/innobase/include/ut0new.h

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -174,6 +174,7 @@ extern PSI_memory_key mem_key_other;
174174
extern PSI_memory_key mem_key_row_log_buf;
175175
extern PSI_memory_key mem_key_row_merge_sort;
176176
extern PSI_memory_key mem_key_std;
177+
extern PSI_memory_key mem_key_trx_sys_t_rw_trx_ids;
177178

178179
/** Setup the internal objects needed for UT_NEW() to operate.
179180
This must be called before the first call to UT_NEW(). */

storage/innobase/read/read0read.cc

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -172,7 +172,7 @@ For details see: row_undo_mod_sec_is_unsafe() and row_purge_poss_sec()
172172
*/
173173
inline void ReadViewBase::snapshot(trx_t *trx)
174174
{
175-
trx_sys.snapshot_ids(trx, &m_ids, &m_low_limit_id, &m_low_limit_no);
175+
m_low_limit_no= trx_sys.snapshot_ids(m_ids, m_low_limit_id);
176176
if (m_ids.empty())
177177
{
178178
m_up_limit_id= m_low_limit_id;

storage/innobase/trx/trx0purge.cc

Lines changed: 4 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -153,10 +153,11 @@ Remove the undo log segment from the rseg slot if it is too big for reuse.
153153
@param[in,out] undo undo log
154154
@param[in,out] mtr mini-transaction */
155155
void
156-
trx_purge_add_undo_to_history(const trx_t* trx, trx_undo_t*& undo, mtr_t* mtr)
156+
trx_purge_add_undo_to_history(const trx_t* trx, trx_undo_t*& undo, mtr_t* mtr,
157+
trx_id_t end)
157158
{
158159
DBUG_PRINT("trx", ("commit(" TRX_ID_FMT "," TRX_ID_FMT ")",
159-
trx->id, trx_id_t{trx->rw_trx_hash_element->no}));
160+
trx->id, end));
160161
ut_ad(undo->id < TRX_RSEG_N_SLOTS);
161162
ut_ad(undo == trx->rsegs.m_redo.undo);
162163
trx_rseg_t *rseg= trx->rsegs.m_redo.rseg;
@@ -257,7 +258,7 @@ trx_purge_add_undo_to_history(const trx_t* trx, trx_undo_t*& undo, mtr_t* mtr)
257258
mtr->write<2>(*undo_page, TRX_UNDO_SEG_HDR + TRX_UNDO_STATE +
258259
undo_page->page.frame, undo_state);
259260
mtr->write<8,mtr_t::MAYBE_NOP>(*undo_page, undo_header + TRX_UNDO_TRX_NO,
260-
trx->rw_trx_hash_element->no);
261+
end);
261262
}
262263

263264
/** Free an undo log segment.

storage/innobase/trx/trx0sys.cc

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -168,6 +168,7 @@ void trx_sys_t::create()
168168
m_initialised= true;
169169
trx_list.create();
170170
rw_trx_hash.init();
171+
rw_trx_ids.create();
171172
for (auto &rseg : temp_rsegs)
172173
rseg.init(nullptr, FIL_NULL);
173174
for (auto &rseg : rseg_array)
@@ -361,6 +362,7 @@ trx_sys_t::close()
361362
}
362363

363364
rw_trx_hash.destroy();
365+
rw_trx_ids.destroy();
364366

365367
/* There can't be any active transactions. */
366368
for (auto& rseg : temp_rsegs) rseg.destroy();

0 commit comments

Comments
 (0)