@@ -340,14 +340,6 @@ struct rw_trx_hash_element_t
340340
341341
342342 trx_id_t id; /* lf_hash_init() relies on this to be first in the struct */
343-
344- /* *
345- Transaction serialization number.
346-
347- Assigned shortly before the transaction is moved to COMMITTED_IN_MEMORY
348- state. Initially set to TRX_ID_MAX.
349- */
350- Atomic_counter<trx_id_t > no;
351343 trx_t *trx;
352344 srw_mutex mutex;
353345};
@@ -443,7 +435,6 @@ class rw_trx_hash_t
443435 ut_ad (element->trx == 0 );
444436 element->trx = trx;
445437 element->id = trx->id ;
446- element->no = TRX_ID_MAX;
447438 trx->rw_trx_hash_element = element;
448439 }
449440
@@ -512,7 +503,6 @@ class rw_trx_hash_t
512503 if (element->trx )
513504 validate_element (element->trx );
514505 element->mutex .wr_unlock ();
515- ut_ad (element->id < element->no );
516506 return arg->action (element, arg->argument );
517507 }
518508#endif
@@ -849,6 +839,119 @@ class thread_safe_trx_ilist_t
849839 alignas (CPU_LEVEL1_DCACHE_LINESIZE) ilist<trx_t > trx_list;
850840};
851841
842+ /* *
843+ Active read-write transaction identifiers and serialisation numbers container.
844+
845+ Unlike rw_trx_hash_t, which is optimized for direct lookup, this
846+ structure is optimized for compact storage and traversal of active
847+ transactions by MVCC read view construction.
848+
849+ The vector may contain empty slots corresponding to idle or read-only
850+ transactions that currently do not own an active read-write trx_id.
851+ Such slots are skipped during traversal.
852+ */
853+ class rw_trx_vector
854+ {
855+ struct rw_trx_id
856+ {
857+ Atomic_relaxed<trx_id_t > id{TRX_ID_MAX};
858+ Atomic_relaxed<trx_id_t > no{TRX_ID_MAX};
859+ trx_t *trx;
860+ rw_trx_id (trx_t *t): trx(t) {}
861+ };
862+ alignas (CPU_LEVEL1_DCACHE_LINESIZE)
863+ std::vector<rw_trx_id, ut_allocator<rw_trx_id>>
864+ ids{ut_allocator<rw_trx_id>(mem_key_trx_sys_t_rw_trx_ids)};
865+ alignas (CPU_LEVEL1_DCACHE_LINESIZE) mutable srw_spin_lock_low latch;
866+
867+ public:
868+ void assign_new_trx_no (const trx_t *trx, trx_id_t no) noexcept
869+ {
870+ latch.rd_lock ();
871+ ut_ad (trx->rw_trx_ids_slot < ids.size ());
872+ ut_ad (ids[trx->rw_trx_ids_slot ].trx == trx);
873+ ut_ad (ids[trx->rw_trx_ids_slot ].id == trx->id );
874+ ut_ad (ids[trx->rw_trx_ids_slot ].no == TRX_ID_MAX);
875+ ids[trx->rw_trx_ids_slot ].no = no;
876+ latch.rd_unlock ();
877+ }
878+ trx_id_t snapshot_ids (trx_ids_t &view_ids,
879+ const trx_id_t max_trx_id) const noexcept
880+ {
881+ trx_id_t min_trx_no{max_trx_id};
882+ view_ids.clear ();
883+ latch.rd_lock ();
884+ view_ids.reserve (ids.size ());
885+ for (const auto &it : ids)
886+ {
887+ trx_id_t id{it.id };
888+ if (id < max_trx_id)
889+ {
890+ view_ids.push_back (id);
891+ const trx_id_t no{it.no };
892+ if (no < min_trx_no)
893+ min_trx_no= no;
894+ }
895+ }
896+ latch.rd_unlock ();
897+ return min_trx_no;
898+ }
899+ void register_rw (const trx_t *trx) noexcept
900+ {
901+ latch.rd_lock ();
902+ ut_ad (trx->rw_trx_ids_slot < ids.size ());
903+ ut_ad (ids[trx->rw_trx_ids_slot ].trx == trx);
904+ ut_ad (ids[trx->rw_trx_ids_slot ].id == TRX_ID_MAX);
905+ ut_ad (ids[trx->rw_trx_ids_slot ].no == TRX_ID_MAX);
906+ ids[trx->rw_trx_ids_slot ].id = trx->id ;
907+ latch.rd_unlock ();
908+ }
909+ void deregister_rw (const trx_t *trx) noexcept
910+ {
911+ latch.rd_lock ();
912+ ut_ad (trx->rw_trx_ids_slot < ids.size ());
913+ rw_trx_id &slot= ids[trx->rw_trx_ids_slot ];
914+ ut_ad (slot.trx == trx);
915+ ut_ad (slot.id == trx->id );
916+ slot.id = TRX_ID_MAX;
917+ slot.no = TRX_ID_MAX;
918+ latch.rd_unlock ();
919+ }
920+ void register_trx (trx_t *trx) noexcept
921+ {
922+ ut_ad (trx->rw_trx_ids_slot == std::numeric_limits<uint32_t >::max ());
923+ latch.wr_lock ();
924+ trx->rw_trx_ids_slot = static_cast <uint32_t >(ids.size ());
925+ ids.emplace_back (trx);
926+ latch.wr_unlock ();
927+ }
928+ void deregister_trx (trx_t *trx) noexcept
929+ {
930+ latch.wr_lock ();
931+ ut_ad (trx->rw_trx_ids_slot < ids.size ());
932+ ut_ad (ids[trx->rw_trx_ids_slot ].trx == trx);
933+ if (trx->rw_trx_ids_slot + 1 < ids.size ())
934+ {
935+ trx_t *move_trx= ids.back ().trx ;
936+ ids[trx->rw_trx_ids_slot ]= std::move (ids.back ());
937+ move_trx->rw_trx_ids_slot = trx->rw_trx_ids_slot ;
938+ }
939+ ids.pop_back ();
940+ latch.wr_unlock ();
941+ trx->rw_trx_ids_slot = std::numeric_limits<uint32_t >::max ();
942+ }
943+ void create () noexcept
944+ {
945+ ut_ad (ids.size () == 0 );
946+ latch.init ();
947+ }
948+ void destroy () noexcept
949+ {
950+ ut_ad (ids.size () == 0 );
951+ latch.destroy ();
952+ }
953+ };
954+
852955/* * The transaction system central memory data structure. */
853956class trx_sys_t
854957{
@@ -876,6 +979,15 @@ class trx_sys_t
876979 /* * False if there is no undo log to purge or rollback */
877980 bool undo_log_nonempty;
878981public:
982+ /* *
983+ Collection of active read-write transaction identifiers and serialization
984+ numbers used for MVCC snapshot creation.
985+
986+ This complements rw_trx_hash with a traversal-friendly representation
987+ optimized for collecting active transaction ids.
988+ */
989+ rw_trx_vector rw_trx_ids;
990+
879991 /* * List of all transactions. */
880992 thread_safe_trx_ilist_t trx_list;
881993
@@ -1014,7 +1126,7 @@ class trx_sys_t
10141126 next call to trx_sys.get_new_trx_id()
10151127 */
10161128
1017- trx_id_t get_max_trx_id ()
1129+ trx_id_t get_max_trx_id () const noexcept
10181130 {
10191131 return m_max_trx_id;
10201132 }
@@ -1037,7 +1149,7 @@ class trx_sys_t
10371149 Allocates and assigns new transaction serialisation number.
10381150
10391151 There's a gap between m_max_trx_id increment and transaction serialisation
1040- number becoming visible through rw_trx_hash . While we're in this gap
1152+ number becoming visible through rw_trx_ids . While we're in this gap
10411153 concurrent thread may come and do MVCC snapshot without seeing allocated
10421154 but not yet assigned serialisation number. Then at some point purge thread
10431155 may clone this view. As a result it won't see newly allocated serialisation
@@ -1047,58 +1159,44 @@ class trx_sys_t
10471159 m_rw_trx_hash_version is intended to solve this problem. MVCC snapshot has
10481160 to wait until m_max_trx_id == m_rw_trx_hash_version, which effectively
10491161 means that all transaction serialisation numbers up to m_max_trx_id are
1050- available through rw_trx_hash .
1162+ available through rw_trx_ids .
10511163
10521164 We rely on refresh_rw_trx_hash_version() to issue RELEASE memory barrier so
1053- that m_rw_trx_hash_version increment happens after
1054- trx->rw_trx_hash_element->no becomes visible through rw_trx_hash .
1165+ that m_rw_trx_hash_version increment happens after transaction serialisation
1166+ number becomes visible through rw_trx_ids .
10551167
10561168 @param trx transaction
10571169 */
1058- void assign_new_trx_no (trx_t *trx)
1170+ trx_id_t assign_new_trx_no (trx_t *trx)
10591171 {
1060- trx->rw_trx_hash_element ->no = get_new_trx_id_no_refresh ();
1172+ trx_id_t no= get_new_trx_id_no_refresh ();
1173+ rw_trx_ids.assign_new_trx_no (trx, no);
10611174 refresh_rw_trx_hash_version ();
1175+ return no;
10621176 }
10631177
10641178
10651179 /* *
10661180 Takes MVCC snapshot.
10671181
1068- To reduce malloc probability we reserve rw_trx_hash.size() + 32 elements
1069- in ids.
1070-
10711182 For details about get_rw_trx_hash_version() != get_max_trx_id() spin
10721183 @sa register_rw() and @sa assign_new_trx_no().
10731184
10741185 We rely on get_rw_trx_hash_version() to issue ACQUIRE memory barrier so
1075- that loading of m_rw_trx_hash_version happens before accessing rw_trx_hash.
1076-
1077- To optimise snapshot creation rw_trx_hash.iterate() is being used instead
1078- of rw_trx_hash.iterate_no_dups(). It means that some transaction
1079- identifiers may appear multiple times in ids.
1186+ that loading of m_rw_trx_hash_version happens before accessing rw_trx_ids.
10801187
1081- @param[in,out] caller_trx used to get access to rw_trx_hash_pins
10821188 @param[out] ids array to store registered transaction identifiers
10831189 @param[out] max_trx_id variable to store m_max_trx_id value
1084- @param[out] mix_trx_no variable to store min(no) value
1190+
1191+ @return min(no)
10851192 */
10861193
1087- void snapshot_ids (trx_t *caller_trx, trx_ids_t *ids, trx_id_t *max_trx_id,
1088- trx_id_t *min_trx_no)
1194+ trx_id_t snapshot_ids (trx_ids_t &ids, trx_id_t &max_trx_id) const noexcept
10891195 {
1090- snapshot_ids_arg arg (ids);
1091-
1092- while ((arg.m_id = get_rw_trx_hash_version ()) != get_max_trx_id ())
1196+ while ((max_trx_id= get_rw_trx_hash_version ()) != get_max_trx_id ())
10931197 ut_delay (1 );
1094- arg.m_no = arg.m_id ;
1095-
1096- ids->clear ();
1097- ids->reserve (rw_trx_hash.size () + 32 );
1098- rw_trx_hash.iterate(caller_trx, copy_one_id, &arg);
10991198
1100- *max_trx_id= arg.m_id ;
1101- *min_trx_no= arg.m_no ;
1199+ return rw_trx_ids.snapshot_ids (ids, max_trx_id);
11021200 }
11031201
11041202
@@ -1149,7 +1247,7 @@ class trx_sys_t
11491247 Transaction becomes visible to MVCC.
11501248
11511249 There's a gap between m_max_trx_id increment and transaction becoming
1152- visible through rw_trx_hash . While we're in this gap concurrent thread may
1250+ visible through rw_trx_ids . While we're in this gap concurrent thread may
11531251 come and do MVCC snapshot. As a result concurrent read view will be able to
11541252 observe records owned by this transaction even before it was committed.
11551253
@@ -1166,20 +1264,23 @@ class trx_sys_t
11661264 void register_rw (trx_t *trx)
11671265 {
11681266 trx->id = get_new_trx_id_no_refresh ();
1169- rw_trx_hash. insert (trx);
1267+ rw_trx_ids. register_rw (trx);
11701268 refresh_rw_trx_hash_version ();
1269+ rw_trx_hash.insert (trx);
11711270 }
11721271
11731272
11741273 /* *
11751274 Deregisters read-write transaction.
11761275
1177- Transaction is removed from rw_trx_hash, which releases all implicit locks.
1178- MVCC snapshot won't see this transaction anymore.
1276+ After this call the transaction is no longer visible as active to MVCC read
1277+ views created subsequently, and all implicit locks held by the transaction
1278+ have been released.
11791279 */
11801280
1181- void deregister_rw (trx_t *trx)
1281+ void deregister_rw (trx_t *trx) noexcept
11821282 {
1283+ rw_trx_ids.deregister_rw (trx);
11831284 rw_trx_hash.erase (trx);
11841285 }
11851286
@@ -1204,6 +1305,7 @@ class trx_sys_t
12041305 void register_trx (trx_t *trx)
12051306 {
12061307 trx_list.push_front (*trx);
1308+ rw_trx_ids.register_trx (trx);
12071309 }
12081310
12091311
@@ -1214,6 +1316,7 @@ class trx_sys_t
12141316 */
12151317 void deregister_trx (trx_t *trx)
12161318 {
1319+ rw_trx_ids.deregister_trx (trx);
12171320 trx_list.remove (*trx);
12181321 }
12191322
@@ -1266,33 +1369,8 @@ class trx_sys_t
12661369private:
12671370 static my_bool find_same_or_older_callback (void *el, void *i) noexcept ;
12681371
1269-
1270- struct snapshot_ids_arg
1271- {
1272- snapshot_ids_arg (trx_ids_t *ids): m_ids(ids) {}
1273- trx_ids_t *m_ids;
1274- trx_id_t m_id;
1275- trx_id_t m_no;
1276- };
1277-
1278-
1279- static my_bool copy_one_id (void * el, void *a)
1280- {
1281- auto element= static_cast <const rw_trx_hash_element_t *>(el);
1282- auto arg= static_cast <snapshot_ids_arg*>(a);
1283- if (element->id < arg->m_id )
1284- {
1285- trx_id_t no= element->no ;
1286- arg->m_ids ->push_back (element->id );
1287- if (no < arg->m_no )
1288- arg->m_no = no;
1289- }
1290- return 0 ;
1291- }
1292-
1293-
12941372 /* * Getter for m_rw_trx_hash_version, must issue ACQUIRE memory barrier. */
1295- trx_id_t get_rw_trx_hash_version ()
1373+ trx_id_t get_rw_trx_hash_version () const noexcept
12961374 {
12971375 return m_rw_trx_hash_version.load (std::memory_order_acquire);
12981376 }
0 commit comments