Skip to content

Commit 0f29b0e

Browse files
committed
Merge pull request #12852 from tanyifeng:net-ip-mtu-discover
PiperOrigin-RevId: 894240043
2 parents c581431 + 03548dc commit 0f29b0e

6 files changed

Lines changed: 93 additions & 48 deletions

File tree

pkg/tcpip/tcpip.go

Lines changed: 2 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -955,9 +955,8 @@ const (
955955

956956
// MTUDiscoverOption is used to set/get the path MTU discovery setting.
957957
//
958-
// NOTE: Setting this option to any other value than PMTUDiscoveryDont
959-
// is not supported and will fail as such, and getting this option will
960-
// always return PMTUDiscoveryDont.
958+
// The value controls whether the Don't Fragment (DF) bit is set on
959+
// outgoing IPv4 packets.
961960
MTUDiscoverOption
962961

963962
// MulticastTTLOption is used by SetSockOptInt/GetSockOptInt to control

pkg/tcpip/transport/internal/network/endpoint.go

Lines changed: 37 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -74,6 +74,8 @@ type Endpoint struct {
7474
ipv4TOS uint8
7575
// +checklocks:mu
7676
ipv6TClass uint8
77+
// +checklocks:mu
78+
pmtud tcpip.PMTUDStrategy
7779

7880
// Lock ordering: mu > infoMu.
7981
infoMu sync.RWMutex `state:"nosave"`
@@ -234,6 +236,7 @@ type WriteContext struct {
234236
route *stack.Route
235237
ttl uint8
236238
tos uint8
239+
df bool
237240
}
238241

239242
func (c *WriteContext) MTU() uint32 {
@@ -362,6 +365,7 @@ func (c *WriteContext) WritePacket(pkt *stack.PacketBuffer, headerIncluded bool)
362365
Protocol: c.e.transProto,
363366
TTL: c.ttl,
364367
TOS: c.tos,
368+
DF: c.df,
365369
ExperimentOptionValue: expOptVal,
366370
}, pkt)
367371

@@ -568,11 +572,28 @@ func (e *Endpoint) AcquireContextForWrite(opts tcpip.WriteOptions) (WriteContext
568572
panic(fmt.Sprintf("invalid protocol number = %d", netProto))
569573
}
570574

575+
// Set the DF (Don't Fragment) bit based on the PMTUD strategy,
576+
// matching TCP behavior in connect.go.
577+
// Note: In gVisor, WANT and DO are treated identically (both set DF).
578+
// Linux kernel differentiates them (WANT allows local fragmentation,
579+
// DO returns EMSGSIZE), but gVisor's IPv4 layer always allows local
580+
// fragmentation for locally-generated packets regardless of DF
581+
// (see gvisor.dev/issue/5919).
582+
//
583+
// PROBE also sets DF, matching Linux ip_dont_fragment(). In Linux,
584+
// PROBE differs from DO only in that it ignores incoming ICMP
585+
// "Fragmentation Needed" messages (i.e. does not update the cached
586+
// route PMTU). Since gVisor does not implement ICMP-based PMTU
587+
// feedback for transport sockets, PROBE and DO are functionally
588+
// equivalent here.
589+
df := e.pmtud == tcpip.PMTUDiscoveryWant || e.pmtud == tcpip.PMTUDiscoveryDo || e.pmtud == tcpip.PMTUDiscoveryProbe
590+
571591
return WriteContext{
572592
e: e,
573593
route: route,
574594
ttl: ttl,
575595
tos: tos,
596+
df: df,
576597
}, nil
577598
}
578599

@@ -840,9 +861,18 @@ func (e *Endpoint) GetRemoteAddress() (tcpip.FullAddress, bool) {
840861
func (e *Endpoint) SetSockOptInt(opt tcpip.SockOptInt, v int) tcpip.Error {
841862
switch opt {
842863
case tcpip.MTUDiscoverOption:
843-
// Return not supported if the value is not disabling path
844-
// MTU discovery.
845-
if tcpip.PMTUDStrategy(v) != tcpip.PMTUDiscoveryDont {
864+
// Store PMTU discovery settings. The DF bit on outgoing
865+
// packets is set accordingly in AcquireContextForWrite.
866+
// PROBE is accepted alongside DO/WANT/DONT. In Linux,
867+
// PROBE sets DF but ignores ICMP-based PMTU updates;
868+
// since gVisor lacks ICMP PMTU feedback, it behaves
869+
// identically to DO.
870+
switch tcpip.PMTUDStrategy(v) {
871+
case tcpip.PMTUDiscoveryWant, tcpip.PMTUDiscoveryDont, tcpip.PMTUDiscoveryDo, tcpip.PMTUDiscoveryProbe:
872+
e.mu.Lock()
873+
e.pmtud = tcpip.PMTUDStrategy(v)
874+
e.mu.Unlock()
875+
default:
846876
return &tcpip.ErrNotSupported{}
847877
}
848878

@@ -891,8 +921,10 @@ func (e *Endpoint) SetSockOptInt(opt tcpip.SockOptInt, v int) tcpip.Error {
891921
func (e *Endpoint) GetSockOptInt(opt tcpip.SockOptInt) (int, tcpip.Error) {
892922
switch opt {
893923
case tcpip.MTUDiscoverOption:
894-
// The only supported setting is path MTU discovery disabled.
895-
return int(tcpip.PMTUDiscoveryDont), nil
924+
e.mu.Lock()
925+
v := int(e.pmtud)
926+
e.mu.Unlock()
927+
return v, nil
896928

897929
case tcpip.MulticastTTLOption:
898930
e.mu.Lock()

pkg/tcpip/transport/tcp/connect.go

Lines changed: 10 additions & 9 deletions
Original file line numberDiff line numberDiff line change
@@ -1031,15 +1031,16 @@ func (e *Endpoint) sendRaw(pkt *stack.PacketBuffer, flags header.TCPFlags, seq,
10311031
}
10321032
pkt.ReserveHeaderBytes(hdrSize)
10331033
return e.sendTCP(e.route, tcpFields{
1034-
id: e.TransportEndpointInfo.ID,
1035-
ttl: calculateTTL(e.route, e.ipv4TTL, e.ipv6HopLimit),
1036-
tos: e.sendTOS,
1037-
flags: flags,
1038-
seq: seq,
1039-
ack: ack,
1040-
rcvWnd: rcvWnd,
1041-
opts: options,
1042-
df: e.pmtud == tcpip.PMTUDiscoveryWant || e.pmtud == tcpip.PMTUDiscoveryDo,
1034+
id: e.TransportEndpointInfo.ID,
1035+
ttl: calculateTTL(e.route, e.ipv4TTL, e.ipv6HopLimit),
1036+
tos: e.sendTOS,
1037+
flags: flags,
1038+
seq: seq,
1039+
ack: ack,
1040+
rcvWnd: rcvWnd,
1041+
opts: options,
1042+
// PROBE sets DF like DO; see network/endpoint.go for details.
1043+
df: e.pmtud == tcpip.PMTUDiscoveryWant || e.pmtud == tcpip.PMTUDiscoveryDo || e.pmtud == tcpip.PMTUDiscoveryProbe,
10431044
expOptVal: expOptVal,
10441045
}, pkt, e.gso)
10451046
}

pkg/tcpip/transport/tcp/endpoint.go

Lines changed: 5 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -1901,15 +1901,15 @@ func (e *Endpoint) SetSockOptInt(opt tcpip.SockOptInt, v int) tcpip.Error {
19011901
e.UnlockUser()
19021902

19031903
case tcpip.MTUDiscoverOption:
1904+
// PROBE is accepted alongside DO/WANT/DONT. In Linux,
1905+
// PROBE sets DF but ignores ICMP-based PMTU updates;
1906+
// since gVisor lacks ICMP PMTU feedback, it behaves
1907+
// identically to DO.
19041908
switch v := tcpip.PMTUDStrategy(v); v {
1905-
case tcpip.PMTUDiscoveryWant, tcpip.PMTUDiscoveryDont, tcpip.PMTUDiscoveryDo:
1909+
case tcpip.PMTUDiscoveryWant, tcpip.PMTUDiscoveryDont, tcpip.PMTUDiscoveryDo, tcpip.PMTUDiscoveryProbe:
19061910
e.LockUser()
19071911
e.pmtud = v
19081912
e.UnlockUser()
1909-
case tcpip.PMTUDiscoveryProbe:
1910-
// We don't support a way to ignore MTU updates; it's
1911-
// either on or it's off.
1912-
return &tcpip.ErrNotSupported{}
19131913
default:
19141914
return &tcpip.ErrNotSupported{}
19151915
}

test/syscalls/linux/tcp_socket.cc

Lines changed: 12 additions & 26 deletions
Original file line numberDiff line numberDiff line change
@@ -2342,38 +2342,24 @@ TEST_P(TcpSocketTest, SetPMTUD) {
23422342
SyscallSucceeds());
23432343
EXPECT_EQ(got, IP_PMTUDISC_WANT);
23442344

2345-
int set = IP_PMTUDISC_DO;
2346-
ASSERT_THAT(
2347-
setsockopt(accepted_.get(), SOL_IP, IP_MTU_DISCOVER, &set, length),
2348-
SyscallSucceeds());
2349-
ASSERT_THAT(
2350-
getsockopt(accepted_.get(), SOL_IP, IP_MTU_DISCOVER, &got, &length),
2351-
SyscallSucceeds());
2352-
EXPECT_EQ(got, IP_PMTUDISC_DO);
2353-
set = IP_PMTUDISC_DONT;
2354-
ASSERT_THAT(
2355-
setsockopt(accepted_.get(), SOL_IP, IP_MTU_DISCOVER, &set, length),
2356-
SyscallSucceeds());
2357-
ASSERT_THAT(
2358-
getsockopt(accepted_.get(), SOL_IP, IP_MTU_DISCOVER, &got, &length),
2359-
SyscallSucceeds());
2360-
EXPECT_EQ(got, IP_PMTUDISC_DONT);
2361-
2362-
// IP_PMTUDISC_PROBE is not supported by gVisor.
2363-
set = IP_PMTUDISC_PROBE;
2364-
if (IsRunningOnGvisor() && !IsRunningWithHostinet()) {
2345+
// Set and verify each valid PMTUD strategy.
2346+
int strategies[] = {IP_PMTUDISC_DONT, IP_PMTUDISC_WANT, IP_PMTUDISC_DO,
2347+
IP_PMTUDISC_PROBE};
2348+
for (int strategy : strategies) {
23652349
ASSERT_THAT(
2366-
setsockopt(accepted_.get(), SOL_IP, IP_MTU_DISCOVER, &set, length),
2367-
SyscallFailsWithErrno(ENOTSUP));
2368-
} else {
2369-
ASSERT_THAT(
2370-
setsockopt(accepted_.get(), SOL_IP, IP_MTU_DISCOVER, &set, length),
2350+
setsockopt(accepted_.get(), SOL_IP, IP_MTU_DISCOVER, &strategy, length),
23712351
SyscallSucceeds());
23722352
ASSERT_THAT(
23732353
getsockopt(accepted_.get(), SOL_IP, IP_MTU_DISCOVER, &got, &length),
23742354
SyscallSucceeds());
2375-
EXPECT_EQ(got, IP_PMTUDISC_PROBE);
2355+
EXPECT_EQ(got, strategy);
23762356
}
2357+
2358+
// Invalid value should fail.
2359+
int invalid = 99;
2360+
EXPECT_THAT(
2361+
setsockopt(accepted_.get(), SOL_IP, IP_MTU_DISCOVER, &invalid, length),
2362+
SyscallFails());
23772363
}
23782364

23792365
TEST_P(SimpleTcpSocketTest, GetSocketAcceptConnWithShutdown) {

test/syscalls/linux/udp_socket.cc

Lines changed: 27 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -2048,6 +2048,33 @@ TEST_P(UdpSocketTest, ConnectToZeroPortConnected) {
20482048
SyscallFailsWithErrno(ENOTCONN));
20492049
}
20502050

2051+
TEST_P(UdpSocketTest, SetPMTUD) {
2052+
// IP_PMTUDISC_WANT should be default.
2053+
int got = -1;
2054+
socklen_t length = sizeof(got);
2055+
ASSERT_THAT(getsockopt(bind_.get(), SOL_IP, IP_MTU_DISCOVER, &got, &length),
2056+
SyscallSucceeds());
2057+
EXPECT_EQ(got, IP_PMTUDISC_WANT);
2058+
2059+
// Set and verify each valid PMTUD strategy.
2060+
int strategies[] = {IP_PMTUDISC_DONT, IP_PMTUDISC_WANT, IP_PMTUDISC_DO,
2061+
IP_PMTUDISC_PROBE};
2062+
for (int strategy : strategies) {
2063+
ASSERT_THAT(
2064+
setsockopt(bind_.get(), SOL_IP, IP_MTU_DISCOVER, &strategy, length),
2065+
SyscallSucceeds());
2066+
ASSERT_THAT(getsockopt(bind_.get(), SOL_IP, IP_MTU_DISCOVER, &got, &length),
2067+
SyscallSucceeds());
2068+
EXPECT_EQ(got, strategy);
2069+
}
2070+
2071+
// Invalid value should fail.
2072+
int invalid = 99;
2073+
EXPECT_THAT(
2074+
setsockopt(bind_.get(), SOL_IP, IP_MTU_DISCOVER, &invalid, length),
2075+
SyscallFails());
2076+
}
2077+
20512078
INSTANTIATE_TEST_SUITE_P(AllInetTests, UdpSocketTest,
20522079
::testing::Values(AF_INET, AF_INET6));
20532080

0 commit comments

Comments
 (0)