Skip to content

Commit 6d2f280

Browse files
committed
MDEV-21423 - lock-free trx_sys get performance regression cause by lf_find and ut_delay
TBD
1 parent c8bfb4d commit 6d2f280

11 files changed

Lines changed: 145 additions & 67 deletions

File tree

mysql-test/suite/perfschema/r/sxlock_func.result

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -10,6 +10,7 @@ name
1010
wait/synch/rwlock/innodb/dict_operation_lock
1111
wait/synch/rwlock/innodb/fil_space_latch
1212
wait/synch/rwlock/innodb/lock_latch
13+
wait/synch/rwlock/innodb/rw_trx_ids_latch
1314
wait/synch/rwlock/innodb/trx_i_s_cache_lock
1415
wait/synch/rwlock/innodb/trx_purge_latch
1516
wait/synch/rwlock/innodb/trx_rseg_latch
@@ -43,6 +44,7 @@ ORDER BY event_name;
4344
event_name
4445
wait/synch/rwlock/innodb/fil_space_latch
4546
wait/synch/rwlock/innodb/lock_latch
47+
wait/synch/rwlock/innodb/rw_trx_ids_latch
4648
SELECT event_name FROM performance_schema.events_waits_history_long
4749
WHERE event_name = 'wait/synch/sxlock/innodb/index_tree_rw_lock'
4850
AND operation IN ('try_shared_lock','shared_lock') LIMIT 1;

storage/innobase/handler/ha_innodb.cc

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -583,6 +583,7 @@ mysql_pfs_key_t fil_space_latch_key;
583583
mysql_pfs_key_t trx_i_s_cache_lock_key;
584584
mysql_pfs_key_t trx_purge_latch_key;
585585
mysql_pfs_key_t trx_rseg_latch_key;
586+
mysql_pfs_key_t rw_trx_ids_latch_key;
586587
mysql_pfs_key_t lock_latch_key;
587588

588589
/* all_innodb_rwlocks array contains rwlocks that are
@@ -598,6 +599,7 @@ static PSI_rwlock_info all_innodb_rwlocks[] =
598599
{ &trx_i_s_cache_lock_key, "trx_i_s_cache_lock", 0 },
599600
{ &trx_purge_latch_key, "trx_purge_latch", 0 },
600601
{ &trx_rseg_latch_key, "trx_rseg_latch", 0 },
602+
{ &rw_trx_ids_latch_key, "rw_trx_ids_latch", 0 },
601603
{ &lock_latch_key, "lock_latch", 0 },
602604
{ &index_tree_rw_lock_key, "index_tree_rw_lock", PSI_RWLOCK_FLAG_SX }
603605
};

storage/innobase/include/trx0sys.h

Lines changed: 120 additions & 57 deletions
Original file line numberDiff line numberDiff line change
@@ -340,14 +340,6 @@ struct rw_trx_hash_element_t
340340

341341

342342
trx_id_t id; /* lf_hash_init() relies on this to be first in the struct */
343-
344-
/**
345-
Transaction serialization number.
346-
347-
Assigned shortly before the transaction is moved to COMMITTED_IN_MEMORY
348-
state. Initially set to TRX_ID_MAX.
349-
*/
350-
Atomic_counter<trx_id_t> no;
351343
trx_t *trx;
352344
srw_mutex mutex;
353345
};
@@ -443,7 +435,6 @@ class rw_trx_hash_t
443435
ut_ad(element->trx == 0);
444436
element->trx= trx;
445437
element->id= trx->id;
446-
element->no= TRX_ID_MAX;
447438
trx->rw_trx_hash_element= element;
448439
}
449440

@@ -512,7 +503,6 @@ class rw_trx_hash_t
512503
if (element->trx)
513504
validate_element(element->trx);
514505
element->mutex.wr_unlock();
515-
ut_ad(element->id < element->no);
516506
return arg->action(element, arg->argument);
517507
}
518508
#endif
@@ -849,6 +839,108 @@ class thread_safe_trx_ilist_t
849839
alignas(CPU_LEVEL1_DCACHE_LINESIZE) ilist<trx_t> trx_list;
850840
};
851841

842+
class rw_trx_vector
843+
{
844+
struct rw_trx_id
845+
{
846+
Atomic_relaxed<trx_id_t> id{TRX_ID_MAX};
847+
Atomic_relaxed<trx_id_t> no{TRX_ID_MAX};
848+
trx_t *trx;
849+
rw_trx_id(trx_t *t): trx(t) {}
850+
};
851+
std::vector<rw_trx_id, ut_allocator<rw_trx_id>>
852+
ids{ut_allocator<rw_trx_id>(mem_key_trx_sys_t_rw_trx_ids)};
853+
mutable srw_spin_lock latch;
854+
855+
public:
856+
void assign_new_trx_no(const trx_t *trx) noexcept
857+
{
858+
ut_ad(trx->rw_trx_ids_slot != std::numeric_limits<uint32_t>::max());
859+
ut_ad(trx->id < trx->no);
860+
latch.rd_lock(SRW_LOCK_CALL);
861+
ut_ad(ids[trx->rw_trx_ids_slot].id == trx->id);
862+
ut_ad(ids[trx->rw_trx_ids_slot].no == TRX_ID_MAX);
863+
ids[trx->rw_trx_ids_slot].no= trx->no;
864+
latch.rd_unlock();
865+
}
866+
trx_id_t snapshot_ids(trx_ids_t &view_ids,
867+
const trx_id_t max_trx_id) const noexcept
868+
{
869+
trx_id_t min_trx_no{max_trx_id};
870+
view_ids.clear();
871+
latch.rd_lock(SRW_LOCK_CALL);
872+
view_ids.reserve(ids.size());
873+
for (const auto &it : ids)
874+
{
875+
trx_id_t id{it.id};
876+
if (id < max_trx_id)
877+
{
878+
view_ids.push_back(id);
879+
const trx_id_t no{it.no};
880+
if (no < min_trx_no)
881+
min_trx_no= no;
882+
}
883+
}
884+
latch.rd_unlock();
885+
return min_trx_no;
886+
}
887+
void register_rw(const trx_t *trx) noexcept
888+
{
889+
ut_ad(trx->rw_trx_ids_slot != std::numeric_limits<uint32_t>::max());
890+
ut_ad(trx->no == TRX_ID_MAX);
891+
latch.rd_lock(SRW_LOCK_CALL);
892+
ut_ad(ids[trx->rw_trx_ids_slot].id == TRX_ID_MAX);
893+
ut_ad(ids[trx->rw_trx_ids_slot].no == TRX_ID_MAX);
894+
ids[trx->rw_trx_ids_slot].id= trx->id;
895+
latch.rd_unlock();
896+
}
897+
void deregister_rw(const trx_t *trx) noexcept
898+
{
899+
ut_ad(trx->rw_trx_ids_slot != std::numeric_limits<uint32_t>::max());
900+
latch.rd_lock(SRW_LOCK_CALL);
901+
ut_ad(ids[trx->rw_trx_ids_slot].id != TRX_ID_MAX);
902+
ut_ad(ids[trx->rw_trx_ids_slot].id == trx->id);
903+
ut_ad(ids[trx->rw_trx_ids_slot].no == trx->no);
904+
ids[trx->rw_trx_ids_slot].id= TRX_ID_MAX;
905+
ids[trx->rw_trx_ids_slot].no= TRX_ID_MAX;
906+
latch.rd_unlock();
907+
}
908+
void register_trx(trx_t *trx) noexcept
909+
{
910+
ut_ad(trx->rw_trx_ids_slot == std::numeric_limits<uint32_t>::max());
911+
ut_ad(trx->no == TRX_ID_MAX);
912+
latch.wr_lock(SRW_LOCK_CALL);
913+
trx->rw_trx_ids_slot= static_cast<uint32_t>(ids.size());
914+
ids.emplace_back(trx);
915+
latch.wr_unlock();
916+
}
917+
void deregister_trx(trx_t *trx) noexcept
918+
{
919+
ut_ad(trx->rw_trx_ids_slot != std::numeric_limits<uint32_t>::max());
920+
ut_ad(trx->no == TRX_ID_MAX);
921+
latch.wr_lock(SRW_LOCK_CALL);
922+
if (trx->rw_trx_ids_slot + 1 < ids.size())
923+
{
924+
trx_t *move_trx= ids.back().trx;
925+
ids[trx->rw_trx_ids_slot]= std::move(ids.back());
926+
move_trx->rw_trx_ids_slot= trx->rw_trx_ids_slot;
927+
}
928+
ids.pop_back();
929+
latch.wr_unlock();
930+
trx->rw_trx_ids_slot= std::numeric_limits<uint32_t>::max();
931+
}
932+
void create() noexcept
933+
{
934+
ut_ad(ids.size() == 0);
935+
latch.SRW_LOCK_INIT(rw_trx_ids_latch_key);
936+
}
937+
void destroy() noexcept
938+
{
939+
ut_ad(ids.size() == 0);
940+
latch.destroy();
941+
}
942+
};
943+
852944
/** The transaction system central memory data structure. */
853945
class trx_sys_t
854946
{
@@ -876,6 +968,8 @@ class trx_sys_t
876968
/** False if there is no undo log to purge or rollback */
877969
bool undo_log_nonempty;
878970
public:
971+
rw_trx_vector rw_trx_ids;
972+
879973
/** List of all transactions. */
880974
thread_safe_trx_ilist_t trx_list;
881975

@@ -1014,7 +1108,7 @@ class trx_sys_t
10141108
next call to trx_sys.get_new_trx_id()
10151109
*/
10161110

1017-
trx_id_t get_max_trx_id()
1111+
trx_id_t get_max_trx_id() const noexcept
10181112
{
10191113
return m_max_trx_id;
10201114
}
@@ -1057,17 +1151,15 @@ class trx_sys_t
10571151
*/
10581152
void assign_new_trx_no(trx_t *trx)
10591153
{
1060-
trx->rw_trx_hash_element->no= get_new_trx_id_no_refresh();
1154+
trx->no= get_new_trx_id_no_refresh();
1155+
rw_trx_ids.assign_new_trx_no(trx);
10611156
refresh_rw_trx_hash_version();
10621157
}
10631158

10641159

10651160
/**
10661161
Takes MVCC snapshot.
10671162
1068-
To reduce malloc probability we reserve rw_trx_hash.size() + 32 elements
1069-
in ids.
1070-
10711163
For details about get_rw_trx_hash_version() != get_max_trx_id() spin
10721164
@sa register_rw() and @sa assign_new_trx_no().
10731165
@@ -1078,27 +1170,18 @@ class trx_sys_t
10781170
of rw_trx_hash.iterate_no_dups(). It means that some transaction
10791171
identifiers may appear multiple times in ids.
10801172
1081-
@param[in,out] caller_trx used to get access to rw_trx_hash_pins
10821173
@param[out] ids array to store registered transaction identifiers
10831174
@param[out] max_trx_id variable to store m_max_trx_id value
1084-
@param[out] mix_trx_no variable to store min(no) value
1175+
1176+
@return min(no)
10851177
*/
10861178

1087-
void snapshot_ids(trx_t *caller_trx, trx_ids_t *ids, trx_id_t *max_trx_id,
1088-
trx_id_t *min_trx_no)
1179+
trx_id_t snapshot_ids(trx_ids_t &ids, trx_id_t &max_trx_id) const noexcept
10891180
{
1090-
snapshot_ids_arg arg(ids);
1091-
1092-
while ((arg.m_id= get_rw_trx_hash_version()) != get_max_trx_id())
1181+
while ((max_trx_id= get_rw_trx_hash_version()) != get_max_trx_id())
10931182
ut_delay(1);
1094-
arg.m_no= arg.m_id;
1095-
1096-
ids->clear();
1097-
ids->reserve(rw_trx_hash.size() + 32);
1098-
rw_trx_hash.iterate(caller_trx, copy_one_id, &arg);
10991183

1100-
*max_trx_id= arg.m_id;
1101-
*min_trx_no= arg.m_no;
1184+
return rw_trx_ids.snapshot_ids(ids, max_trx_id);
11021185
}
11031186

11041187

@@ -1166,8 +1249,9 @@ class trx_sys_t
11661249
void register_rw(trx_t *trx)
11671250
{
11681251
trx->id= get_new_trx_id_no_refresh();
1169-
rw_trx_hash.insert(trx);
1252+
rw_trx_ids.register_rw(trx);
11701253
refresh_rw_trx_hash_version();
1254+
rw_trx_hash.insert(trx);
11711255
}
11721256

11731257

@@ -1178,9 +1262,11 @@ class trx_sys_t
11781262
MVCC snapshot won't see this transaction anymore.
11791263
*/
11801264

1181-
void deregister_rw(trx_t *trx)
1265+
void deregister_rw(trx_t *trx) noexcept
11821266
{
1267+
rw_trx_ids.deregister_rw(trx);
11831268
rw_trx_hash.erase(trx);
1269+
trx->no= TRX_ID_MAX;
11841270
}
11851271

11861272

@@ -1204,6 +1290,7 @@ class trx_sys_t
12041290
void register_trx(trx_t *trx)
12051291
{
12061292
trx_list.push_front(*trx);
1293+
rw_trx_ids.register_trx(trx);
12071294
}
12081295

12091296

@@ -1214,6 +1301,7 @@ class trx_sys_t
12141301
*/
12151302
void deregister_trx(trx_t *trx)
12161303
{
1304+
rw_trx_ids.deregister_trx(trx);
12171305
trx_list.remove(*trx);
12181306
}
12191307

@@ -1266,33 +1354,8 @@ class trx_sys_t
12661354
private:
12671355
static my_bool find_same_or_older_callback(void *el, void *i) noexcept;
12681356

1269-
1270-
struct snapshot_ids_arg
1271-
{
1272-
snapshot_ids_arg(trx_ids_t *ids): m_ids(ids) {}
1273-
trx_ids_t *m_ids;
1274-
trx_id_t m_id;
1275-
trx_id_t m_no;
1276-
};
1277-
1278-
1279-
static my_bool copy_one_id(void* el, void *a)
1280-
{
1281-
auto element= static_cast<const rw_trx_hash_element_t *>(el);
1282-
auto arg= static_cast<snapshot_ids_arg*>(a);
1283-
if (element->id < arg->m_id)
1284-
{
1285-
trx_id_t no= element->no;
1286-
arg->m_ids->push_back(element->id);
1287-
if (no < arg->m_no)
1288-
arg->m_no= no;
1289-
}
1290-
return 0;
1291-
}
1292-
1293-
12941357
/** Getter for m_rw_trx_hash_version, must issue ACQUIRE memory barrier. */
1295-
trx_id_t get_rw_trx_hash_version()
1358+
trx_id_t get_rw_trx_hash_version() const noexcept
12961359
{
12971360
return m_rw_trx_hash_version.load(std::memory_order_acquire);
12981361
}

storage/innobase/include/trx0trx.h

Lines changed: 7 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -629,12 +629,15 @@ struct trx_t : ilist_node<>
629629

630630

631631
public:
632+
/** trx_sys.rw_trx_ids index, protected by mutex */
633+
uint32_t rw_trx_ids_slot;
632634
/** Transaction identifier (0 if no locks were acquired).
633635
Set by trx_sys_t::register_rw() or trx_resurrect() before
634636
the transaction is added to trx_sys.rw_trx_hash.
635637
Cleared in commit_in_memory() after commit_state(),
636638
trx_sys_t::deregister_rw(), release_locks(). */
637639
trx_id_t id;
640+
trx_id_t no;
638641
union
639642
{
640643
/** The largest encountered transaction identifier for which no
@@ -659,25 +662,25 @@ struct trx_t : ilist_node<>
659662
private:
660663
/** mutex protecting state and some of lock
661664
(some are protected by lock_sys.latch) */
662-
srw_spin_mutex mutex;
665+
mutable srw_spin_mutex mutex;
663666
#ifdef UNIV_DEBUG
664667
/** The owner of mutex (0 if none); protected by mutex */
665-
std::atomic<pthread_t> mutex_owner{0};
668+
mutable std::atomic<pthread_t> mutex_owner{0};
666669
#endif /* UNIV_DEBUG */
667670
public:
668671
void mutex_init() { mutex.init(); }
669672
void mutex_destroy() { mutex.destroy(); }
670673

671674
/** Acquire the mutex */
672-
void mutex_lock()
675+
void mutex_lock() const
673676
{
674677
ut_ad(!mutex_is_owner());
675678
mutex.wr_lock();
676679
assert(!mutex_owner.exchange(pthread_self(),
677680
std::memory_order_relaxed));
678681
}
679682
/** Release the mutex */
680-
void mutex_unlock()
683+
void mutex_unlock() const
681684
{
682685
assert(mutex_owner.exchange(0, std::memory_order_relaxed) ==
683686
pthread_self());

storage/innobase/include/univ.i

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -497,5 +497,6 @@ extern mysql_pfs_key_t index_online_log_key;
497497
extern mysql_pfs_key_t trx_sys_rw_lock_key;
498498
extern mysql_pfs_key_t lock_latch_key;
499499
extern mysql_pfs_key_t trx_rseg_latch_key;
500+
extern mysql_pfs_key_t rw_trx_ids_latch_key;
500501
# endif /* UNIV_PFS_RWLOCK */
501502
#endif /* HAVE_PSI_INTERFACE */

storage/innobase/include/ut0new.h

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -174,6 +174,7 @@ extern PSI_memory_key mem_key_other;
174174
extern PSI_memory_key mem_key_row_log_buf;
175175
extern PSI_memory_key mem_key_row_merge_sort;
176176
extern PSI_memory_key mem_key_std;
177+
extern PSI_memory_key mem_key_trx_sys_t_rw_trx_ids;
177178

178179
/** Setup the internal objects needed for UT_NEW() to operate.
179180
This must be called before the first call to UT_NEW(). */

storage/innobase/read/read0read.cc

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -172,7 +172,7 @@ For details see: row_undo_mod_sec_is_unsafe() and row_purge_poss_sec()
172172
*/
173173
inline void ReadViewBase::snapshot(trx_t *trx)
174174
{
175-
trx_sys.snapshot_ids(trx, &m_ids, &m_low_limit_id, &m_low_limit_no);
175+
m_low_limit_no= trx_sys.snapshot_ids(m_ids, m_low_limit_id);
176176
if (m_ids.empty())
177177
{
178178
m_up_limit_id= m_low_limit_id;

0 commit comments

Comments
 (0)