Skip to content

Commit 0b5e021

Browse files
edumazetopsiff
authored andcommitted
tcp: use RCU lookup in __inet_hash_connect()
mainline inclusion from mainline-v6.15-rc1 category: performance When __inet_hash_connect() has to try many 4-tuples before finding an available one, we see a high spinlock cost from the many spin_lock_bh(&head->lock) performed in its loop. This patch adds an RCU lookup to avoid the spinlock cost. check_established() gets a new @rcu_lookup argument. First reason is to not make any changes while head->lock is not held. Second reason is to not make this RCU lookup a second time after the spinlock has been acquired. Tested: Server: ulimit -n 40000; neper/tcp_crr -T 200 -F 30000 -6 --nolog Client: ulimit -n 40000; neper/tcp_crr -T 200 -F 30000 -6 --nolog -c -H server Before series: utime_start=0.288582 utime_end=1.548707 stime_start=20.637138 stime_end=2002.489845 num_transactions=484453 latency_min=0.156279245 latency_max=20.922042756 latency_mean=1.546521274 latency_stddev=3.936005194 num_samples=312537 throughput=47426.00 perf top on the client: 49.54% [kernel] [k] _raw_spin_lock 25.87% [kernel] [k] _raw_spin_lock_bh 5.97% [kernel] [k] queued_spin_lock_slowpath 5.67% [kernel] [k] __inet_hash_connect 3.53% [kernel] [k] __inet6_check_established 3.48% [kernel] [k] inet6_ehashfn 0.64% [kernel] [k] rcu_all_qs After this series: utime_start=0.271607 utime_end=3.847111 stime_start=18.407684 stime_end=1997.485557 num_transactions=1350742 latency_min=0.014131929 latency_max=17.895073144 latency_mean=0.505675853 # Nice reduction of latency metrics latency_stddev=2.125164772 num_samples=307884 throughput=139866.80 # 190 % increase perf top on client: 56.86% [kernel] [k] __inet6_check_established 17.96% [kernel] [k] __inet_hash_connect 13.88% [kernel] [k] inet6_ehashfn 2.52% [kernel] [k] rcu_all_qs 2.01% [kernel] [k] __cond_resched 0.41% [kernel] [k] _raw_spin_lock Signed-off-by: Eric Dumazet <edumazet@google.com> Reviewed-by: Jason Xing <kerneljasonxing@gmail.com> Tested-by: Jason Xing <kerneljasonxing@gmail.com> Reviewed-by: Kuniyuki Iwashima <kuniyu@amazon.com> Link: https://patch.msgid.link/20250302124237.3913746-5-edumazet@google.com Signed-off-by: Jakub Kicinski <kuba@kernel.org> (cherry picked from commit 86c2bc2) Signed-off-by: Wentao Guan <guanwentao@uniontech.com> Change-Id: Icf547979f93422af63cd937427ead38f616f0b4d Signed-off-by: Wentao Guan <guanwentao@uniontech.com>
1 parent 5c972a0 commit 0b5e021

3 files changed

Lines changed: 50 additions & 29 deletions

File tree

include/net/inet_hashtables.h

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -529,7 +529,8 @@ int __inet_hash_connect(struct inet_timewait_death_row *death_row,
529529
struct sock *sk, u64 port_offset,
530530
int (*check_established)(struct inet_timewait_death_row *,
531531
struct sock *, __u16,
532-
struct inet_timewait_sock **));
532+
struct inet_timewait_sock **,
533+
bool rcu_lookup));
533534

534535
int inet_hash_connect(struct inet_timewait_death_row *death_row,
535536
struct sock *sk);

net/ipv4/inet_hashtables.c

Lines changed: 35 additions & 17 deletions
Original file line numberDiff line numberDiff line change
@@ -538,7 +538,8 @@ EXPORT_SYMBOL_GPL(__inet_lookup_established);
538538
/* called with local bh disabled */
539539
static int __inet_check_established(struct inet_timewait_death_row *death_row,
540540
struct sock *sk, __u16 lport,
541-
struct inet_timewait_sock **twp)
541+
struct inet_timewait_sock **twp,
542+
bool rcu_lookup)
542543
{
543544
struct inet_hashinfo *hinfo = death_row->hashinfo;
544545
struct inet_sock *inet = inet_sk(sk);
@@ -557,17 +558,17 @@ static int __inet_check_established(struct inet_timewait_death_row *death_row,
557558
struct sock *sk2;
558559
spinlock_t *lock;
559560

560-
rcu_read_lock();
561-
sk_nulls_for_each(sk2, node, &head->chain) {
562-
if (sk2->sk_hash != hash ||
563-
!inet_match(net, sk2, acookie, ports, dif, sdif))
564-
continue;
565-
if (sk2->sk_state == TCP_TIME_WAIT)
566-
break;
567-
rcu_read_unlock();
568-
return -EADDRNOTAVAIL;
561+
if (rcu_lookup) {
562+
sk_nulls_for_each(sk2, node, &head->chain) {
563+
if (sk2->sk_hash != hash ||
564+
!inet_match(net, sk2, acookie, ports, dif, sdif))
565+
continue;
566+
if (sk2->sk_state == TCP_TIME_WAIT)
567+
break;
568+
return -EADDRNOTAVAIL;
569+
}
570+
return 0;
569571
}
570-
rcu_read_unlock();
571572

572573
lock = inet_ehash_lockp(hinfo, hash);
573574
spin_lock(lock);
@@ -1007,7 +1008,8 @@ static u32 *table_perturb;
10071008
int __inet_hash_connect(struct inet_timewait_death_row *death_row,
10081009
struct sock *sk, u64 port_offset,
10091010
int (*check_established)(struct inet_timewait_death_row *,
1010-
struct sock *, __u16, struct inet_timewait_sock **))
1011+
struct sock *, __u16, struct inet_timewait_sock **,
1012+
bool rcu_lookup))
10111013
{
10121014
struct inet_hashinfo *hinfo = death_row->hashinfo;
10131015
struct inet_bind_hashbucket *head, *head2;
@@ -1024,7 +1026,7 @@ int __inet_hash_connect(struct inet_timewait_death_row *death_row,
10241026

10251027
if (port) {
10261028
local_bh_disable();
1027-
ret = check_established(death_row, sk, port, NULL);
1029+
ret = check_established(death_row, sk, port, NULL, false);
10281030
local_bh_enable();
10291031
return ret;
10301032
}
@@ -1057,6 +1059,21 @@ int __inet_hash_connect(struct inet_timewait_death_row *death_row,
10571059
continue;
10581060
head = &hinfo->bhash[inet_bhashfn(net, port,
10591061
hinfo->bhash_size)];
1062+
rcu_read_lock();
1063+
hlist_for_each_entry_rcu(tb, &head->chain, node) {
1064+
if (!inet_bind_bucket_match(tb, net, port, l3mdev))
1065+
continue;
1066+
if (tb->fastreuse >= 0 || tb->fastreuseport >= 0) {
1067+
rcu_read_unlock();
1068+
goto next_port;
1069+
}
1070+
if (!check_established(death_row, sk, port, &tw, true))
1071+
break;
1072+
rcu_read_unlock();
1073+
goto next_port;
1074+
}
1075+
rcu_read_unlock();
1076+
10601077
spin_lock_bh(&head->lock);
10611078

10621079
/* Does not bother with rcv_saddr checks, because
@@ -1066,12 +1083,12 @@ int __inet_hash_connect(struct inet_timewait_death_row *death_row,
10661083
if (inet_bind_bucket_match(tb, net, port, l3mdev)) {
10671084
if (tb->fastreuse >= 0 ||
10681085
tb->fastreuseport >= 0)
1069-
goto next_port;
1086+
goto next_port_unlock;
10701087
WARN_ON(hlist_empty(&tb->bhash2));
10711088
if (!check_established(death_row, sk,
1072-
port, &tw))
1089+
port, &tw, false))
10731090
goto ok;
1074-
goto next_port;
1091+
goto next_port_unlock;
10751092
}
10761093
}
10771094

@@ -1085,8 +1102,9 @@ int __inet_hash_connect(struct inet_timewait_death_row *death_row,
10851102
tb->fastreuse = -1;
10861103
tb->fastreuseport = -1;
10871104
goto ok;
1088-
next_port:
1105+
next_port_unlock:
10891106
spin_unlock_bh(&head->lock);
1107+
next_port:
10901108
cond_resched();
10911109
}
10921110

net/ipv6/inet6_hashtables.c

Lines changed: 13 additions & 11 deletions
Original file line numberDiff line numberDiff line change
@@ -263,7 +263,8 @@ EXPORT_SYMBOL_GPL(inet6_lookup);
263263

264264
static int __inet6_check_established(struct inet_timewait_death_row *death_row,
265265
struct sock *sk, const __u16 lport,
266-
struct inet_timewait_sock **twp)
266+
struct inet_timewait_sock **twp,
267+
bool rcu_lookup)
267268
{
268269
struct inet_hashinfo *hinfo = death_row->hashinfo;
269270
struct inet_sock *inet = inet_sk(sk);
@@ -281,17 +282,18 @@ static int __inet6_check_established(struct inet_timewait_death_row *death_row,
281282
struct sock *sk2;
282283
spinlock_t *lock;
283284

284-
rcu_read_lock();
285-
sk_nulls_for_each(sk2, node, &head->chain) {
286-
if (sk2->sk_hash != hash ||
287-
!inet6_match(net, sk2, saddr, daddr, ports, dif, sdif))
288-
continue;
289-
if (sk2->sk_state == TCP_TIME_WAIT)
290-
break;
291-
rcu_read_unlock();
292-
return -EADDRNOTAVAIL;
285+
if (rcu_lookup) {
286+
sk_nulls_for_each(sk2, node, &head->chain) {
287+
if (sk2->sk_hash != hash ||
288+
!inet6_match(net, sk2, saddr, daddr,
289+
ports, dif, sdif))
290+
continue;
291+
if (sk2->sk_state == TCP_TIME_WAIT)
292+
break;
293+
return -EADDRNOTAVAIL;
294+
}
295+
return 0;
293296
}
294-
rcu_read_unlock();
295297

296298
lock = inet_ehash_lockp(hinfo, hash);
297299
spin_lock(lock);

0 commit comments

Comments
 (0)