Skip to content

Commit 8c047dc

Browse files
committed
Fix EBADF crash by keeping VZVirtioSocketConnection alive
When dialAgent() creates a gRPC connection via vsock, it dup's the file descriptor and immediately closes the VZVirtioSocketConnection. The Virtualization framework tears down the vsock endpoint when the connection is closed, which can invalidate the dup'd descriptor. Since gRPC defers NIO channel creation until the first RPC, the fd may be invalid by then, causing a precondition failure in NIO's fcntl(F_SETNOSIGPIPE). The fix introduces VsockTransport, a Sendable wrapper that retains the VZVirtioSocketConnection until Vminitd.close() explicitly shuts it down after the gRPC channel. A new dupFileDescriptor() method dups without closing, and dialAgent() passes the connection as transport.
1 parent fe7f854 commit 8c047dc

File tree

7 files changed

+269
-9
lines changed

7 files changed

+269
-9
lines changed

Sources/Containerization/VZVirtualMachine+Helpers.swift

Lines changed: 24 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -122,13 +122,15 @@ extension VZVirtualMachine {
122122
}
123123

124124
extension VZVirtualMachine {
125-
func waitForAgent(queue: DispatchQueue) async throws -> FileHandle {
125+
func waitForAgent(queue: DispatchQueue) async throws -> (FileHandle, VsockTransport) {
126126
let agentConnectionRetryCount: Int = 200
127127
let agentConnectionSleepDuration: Duration = .milliseconds(20)
128128

129129
for _ in 0...agentConnectionRetryCount {
130130
do {
131-
return try await self.connect(queue: queue, port: Vminitd.port).dupHandle()
131+
let conn = try await self.connect(queue: queue, port: Vminitd.port)
132+
let handle = try conn.dupFileDescriptor()
133+
return (handle, VsockTransport(conn))
132134
} catch {
133135
try await Task.sleep(for: agentConnectionSleepDuration)
134136
continue
@@ -139,6 +141,12 @@ extension VZVirtualMachine {
139141
}
140142

141143
extension VZVirtioSocketConnection {
144+
/// Duplicates the file descriptor and immediately closes the connection.
145+
///
146+
/// Only safe when the returned fd is used synchronously before any
147+
/// suspension point. For deferred use (e.g., gRPC/NIO), use
148+
/// ``dupFileDescriptor()`` and keep the connection alive via
149+
/// ``VsockTransport``.
142150
func dupHandle() throws -> FileHandle {
143151
let fd = dup(self.fileDescriptor)
144152
if fd == -1 {
@@ -147,6 +155,20 @@ extension VZVirtioSocketConnection {
147155
self.close()
148156
return FileHandle(fileDescriptor: fd, closeOnDealloc: false)
149157
}
158+
159+
/// Duplicates the connection's file descriptor without closing the connection.
160+
///
161+
/// The caller must keep the `VZVirtioSocketConnection` alive until the dup'd
162+
/// descriptor is no longer needed. The Virtualization framework tears down the
163+
/// vsock endpoint when the connection is closed, which invalidates dup'd
164+
/// descriptors.
165+
func dupFileDescriptor() throws -> FileHandle {
166+
let fd = dup(self.fileDescriptor)
167+
if fd == -1 {
168+
throw POSIXError.fromErrno()
169+
}
170+
return FileHandle(fileDescriptor: fd, closeOnDealloc: false)
171+
}
150172
}
151173

152174
#endif

Sources/Containerization/VZVirtualMachineInstance.swift

Lines changed: 4 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -125,10 +125,8 @@ extension VZVirtualMachineInstance: VirtualMachineInstance {
125125

126126
try await self.vm.start(queue: self.queue)
127127

128-
let agent = Vminitd(
129-
connection: try await self.vm.waitForAgent(queue: self.queue),
130-
group: self.group
131-
)
128+
let (handle, transport) = try await self.vm.waitForAgent(queue: self.queue)
129+
let agent = Vminitd(connection: handle, transport: transport, group: self.group)
132130

133131
do {
134132
if self.config.rosetta {
@@ -189,9 +187,8 @@ extension VZVirtualMachineInstance: VirtualMachineInstance {
189187
queue: queue,
190188
port: Vminitd.port
191189
)
192-
let handle = try conn.dupHandle()
193-
let agent = Vminitd(connection: handle, group: self.group)
194-
return agent
190+
let handle = try conn.dupFileDescriptor()
191+
return Vminitd(connection: handle, transport: VsockTransport(conn), group: self.group)
195192
} catch {
196193
if let err = error as? ContainerizationError {
197194
throw err

Sources/Containerization/Vminitd.swift

Lines changed: 17 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -33,16 +33,33 @@ public struct Vminitd: Sendable {
3333

3434
let client: Client
3535

36+
/// Retains the underlying vsock connection to keep the file descriptor
37+
/// valid for the gRPC client's lifetime. The Virtualization framework
38+
/// tears down the vsock endpoint when the connection is closed, which
39+
/// invalidates dup'd descriptors. Must remain open until the gRPC
40+
/// channel is shut down.
41+
private let transport: VsockTransport?
42+
3643
public init(client: Client) {
3744
self.client = client
45+
self.transport = nil
3846
}
3947

4048
public init(connection: FileHandle, group: EventLoopGroup) {
4149
self.client = .init(connection: connection, group: group)
50+
self.transport = nil
51+
}
52+
53+
init(connection: FileHandle, transport: VsockTransport, group: EventLoopGroup) {
54+
self.client = .init(connection: connection, group: group)
55+
self.transport = transport
4256
}
4357

4458
/// Close the connection to the guest agent.
4559
public func close() async throws {
60+
// Shut down the gRPC channel first (NIO closes the dup'd fd),
61+
// then close the vsock endpoint so the guest sees EOF immediately.
62+
defer { transport?.close() }
4663
try await client.close()
4764
}
4865
}

Sources/Containerization/VsockListener.swift

Lines changed: 7 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -52,6 +52,13 @@ public final class VsockListener: NSObject, Sendable, AsyncSequence {
5252
#if os(macOS)
5353

5454
extension VsockListener: VZVirtioSocketListenerDelegate {
55+
/// Accepts a new vsock connection by dup'ing its fd and closing the original.
56+
///
57+
/// The dup'd fd is yielded into the `AsyncStream` for immediate consumption.
58+
/// Callers must use the `FileHandle` before any suspension point — the
59+
/// Virtualization framework tears down the vsock endpoint when the connection
60+
/// is closed, which can invalidate dup'd descriptors if the underlying kernel
61+
/// object is reclaimed. For deferred use (e.g., gRPC/NIO), see `VsockTransport`.
5562
public func listener(
5663
_: VZVirtioSocketListener, shouldAcceptNewConnection conn: VZVirtioSocketConnection,
5764
from _: VZVirtioSocketDevice
Lines changed: 55 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,55 @@
1+
//===----------------------------------------------------------------------===//
2+
// Copyright © 2025-2026 Apple Inc. and the Containerization project authors.
3+
//
4+
// Licensed under the Apache License, Version 2.0 (the "License");
5+
// you may not use this file except in compliance with the License.
6+
// You may obtain a copy of the License at
7+
//
8+
// https://www.apache.org/licenses/LICENSE-2.0
9+
//
10+
// Unless required by applicable law or agreed to in writing, software
11+
// distributed under the License is distributed on an "AS IS" BASIS,
12+
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13+
// See the License for the specific language governing permissions and
14+
// limitations under the License.
15+
//===----------------------------------------------------------------------===//
16+
17+
#if os(macOS)
18+
import Foundation
19+
import Virtualization
20+
21+
/// Manages the lifecycle of a VZVirtioSocketConnection for use as a gRPC transport.
22+
///
23+
/// When a vsock connection's file descriptor is dup'd and handed to gRPC/NIO,
24+
/// the original VZVirtioSocketConnection must remain open. The Virtualization
25+
/// framework tears down the host-to-guest vsock mapping when the connection is
26+
/// closed, which invalidates dup'd descriptors. This wrapper keeps the
27+
/// connection alive and provides explicit close semantics.
28+
///
29+
/// Uses `@unchecked Sendable` because VZVirtioSocketConnection is not Sendable,
30+
/// which also prevents using Mutex (its init requires a `sending` parameter that
31+
/// conflicts with the non-Sendable connection at async call sites).
32+
final class VsockTransport: @unchecked Sendable {
33+
private var connection: VZVirtioSocketConnection?
34+
private let lock = NSLock()
35+
36+
init(_ connection: VZVirtioSocketConnection) {
37+
self.connection = connection
38+
}
39+
40+
/// Closes the underlying vsock connection, tearing down the host-side endpoint.
41+
func close() {
42+
lock.lock()
43+
defer { lock.unlock() }
44+
connection?.close()
45+
connection = nil
46+
}
47+
48+
deinit {
49+
// No lock needed: deinit runs only after all strong references are
50+
// released, so no concurrent close() call is possible.
51+
connection?.close()
52+
}
53+
}
54+
55+
#endif

Sources/Integration/ContainerTests.swift

Lines changed: 6 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -3979,6 +3979,12 @@ extension IntegrationSuite {
39793979
/// where the fd must remain valid — if the VZVirtioSocketConnection is
39803980
/// closed prematurely, the fd may be invalidated by the time NIO tries
39813981
/// fcntl(F_SETNOSIGPIPE), causing a precondition failure.
3982+
///
3983+
/// The same VsockTransport fix also applies to the waitForAgent() startup
3984+
/// path (where the first RPC is setTime via TimeSyncer). That path is
3985+
/// implicitly exercised by every integration test that boots a container,
3986+
/// but isn't stress-tested with an artificial delay here because the timing
3987+
/// depends on VM boot and Rosetta setup, which aren't controllable.
39823988
func testExecDeferredConnectionStability() async throws {
39833989
let id = "test-exec-deferred-connection-stability"
39843990

Lines changed: 156 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,156 @@
1+
//===----------------------------------------------------------------------===//
2+
// Copyright © 2025-2026 Apple Inc. and the Containerization project authors.
3+
//
4+
// Licensed under the Apache License, Version 2.0 (the "License");
5+
// you may not use this file except in compliance with the License.
6+
// You may obtain a copy of the License at
7+
//
8+
// https://www.apache.org/licenses/LICENSE-2.0
9+
//
10+
// Unless required by applicable law or agreed to in writing, software
11+
// distributed under the License is distributed on an "AS IS" BASIS,
12+
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13+
// See the License for the specific language governing permissions and
14+
// limitations under the License.
15+
//===----------------------------------------------------------------------===//
16+
17+
#if os(macOS)
18+
19+
import Darwin
20+
import Foundation
21+
import Testing
22+
23+
@testable import Containerization
24+
25+
/// Tests for the VsockTransport fd lifecycle fix.
26+
///
27+
/// The Virtualization framework tears down the vsock endpoint when a
28+
/// VZVirtioSocketConnection is closed, invalidating dup'd descriptors.
29+
/// The fix keeps the connection alive via VsockTransport until the gRPC
30+
/// channel is shut down.
31+
///
32+
/// These tests use Unix socket pairs to verify:
33+
/// 1. A dup'd fd is fully functional when the original is kept alive.
34+
/// 2. The specific fcntl call that triggers the NIO crash (F_SETNOSIGPIPE)
35+
/// works on the dup'd fd.
36+
/// 3. The correct teardown order (close dup'd fd first, then original)
37+
/// preserves the connection for the peer until the original is closed.
38+
@Suite("VsockTransport tests")
39+
struct VsockTransportTests {
40+
41+
/// Creates a connected Unix socket pair. Returns (fd0, fd1).
42+
private func makeSocketPair() throws -> (Int32, Int32) {
43+
var fds: [Int32] = [0, 0]
44+
let result = socketpair(AF_UNIX, SOCK_STREAM, 0, &fds)
45+
try #require(result == 0, "socketpair should succeed")
46+
return (fds[0], fds[1])
47+
}
48+
49+
// MARK: - fd lifecycle tests
50+
51+
/// Verifies that F_SETNOSIGPIPE (the exact fcntl call where NIO crashes)
52+
/// succeeds on a dup'd fd when the original is kept alive.
53+
@Test func dupdDescriptorSupportsFcntlWhenOriginalAlive() throws {
54+
let (fd0, fd1) = try makeSocketPair()
55+
defer {
56+
close(fd0)
57+
close(fd1)
58+
}
59+
60+
let dupdFd = dup(fd0)
61+
try #require(dupdFd != -1)
62+
defer { close(dupdFd) }
63+
64+
// This is the exact operation that triggers the NIO EBADF crash
65+
// when the underlying vsock endpoint has been torn down.
66+
let result = fcntl(dupdFd, F_SETNOSIGPIPE, 1)
67+
#expect(result == 0, "F_SETNOSIGPIPE should succeed on dup'd fd when original is alive")
68+
}
69+
70+
/// Verifies that a dup'd fd can read data written by the peer when the
71+
/// original fd is kept alive.
72+
@Test func dupdDescriptorCanReadWhenOriginalAlive() throws {
73+
let (fd0, fd1) = try makeSocketPair()
74+
defer {
75+
close(fd0)
76+
close(fd1)
77+
}
78+
79+
let dupdFd = dup(fd0)
80+
try #require(dupdFd != -1)
81+
defer { close(dupdFd) }
82+
83+
// Peer writes data.
84+
let message: [UInt8] = [1, 2, 3]
85+
let writeResult = message.withUnsafeBufferPointer { buf in
86+
write(fd1, buf.baseAddress, buf.count)
87+
}
88+
try #require(writeResult == 3)
89+
90+
// Dup'd fd can read because the original keeps the connection alive.
91+
var readBuf = [UInt8](repeating: 0, count: 3)
92+
let readResult = readBuf.withUnsafeMutableBufferPointer { buf in
93+
read(dupdFd, buf.baseAddress, buf.count)
94+
}
95+
#expect(readResult == 3)
96+
#expect(readBuf == [1, 2, 3])
97+
}
98+
99+
/// Verifies the correct teardown order: closing the dup'd fd first (gRPC
100+
/// channel shutdown) does not break the connection for the peer, because
101+
/// the original fd (transport) is still alive.
102+
@Test func peerCanWriteAfterDupdFdClosedWhileOriginalAlive() throws {
103+
let (fd0, fd1) = try makeSocketPair()
104+
defer {
105+
close(fd0)
106+
close(fd1)
107+
}
108+
109+
let dupdFd = dup(fd0)
110+
try #require(dupdFd != -1)
111+
112+
// Close the dup'd fd (simulates gRPC channel shutdown).
113+
close(dupdFd)
114+
115+
// The peer can still write because the original fd keeps the
116+
// connection alive. This matters for orderly shutdown: the guest
117+
// doesn't see an unexpected EOF while the host is still tearing
118+
// down the gRPC channel.
119+
let message: [UInt8] = [42]
120+
let writeResult = message.withUnsafeBufferPointer { buf in
121+
write(fd1, buf.baseAddress, buf.count)
122+
}
123+
#expect(writeResult == 1, "Peer can still write after dup'd fd is closed")
124+
125+
// Read from the original to confirm data arrived.
126+
var readBuf = [UInt8](repeating: 0, count: 1)
127+
let readResult = readBuf.withUnsafeMutableBufferPointer { buf in
128+
read(fd0, buf.baseAddress, buf.count)
129+
}
130+
#expect(readResult == 1)
131+
#expect(readBuf == [42])
132+
}
133+
134+
/// Verifies that after both the dup'd fd and the original are closed,
135+
/// the peer sees EOF (read returns 0).
136+
@Test func peerSeesEOFAfterBothDescriptorsClosed() throws {
137+
let (fd0, fd1) = try makeSocketPair()
138+
defer { close(fd1) }
139+
140+
let dupdFd = dup(fd0)
141+
try #require(dupdFd != -1)
142+
143+
// Close dup'd fd first (gRPC shutdown), then original (transport.close()).
144+
close(dupdFd)
145+
close(fd0)
146+
147+
// Peer should see EOF.
148+
var readBuf = [UInt8](repeating: 0, count: 1)
149+
let readResult = readBuf.withUnsafeMutableBufferPointer { buf in
150+
read(fd1, buf.baseAddress, buf.count)
151+
}
152+
#expect(readResult == 0, "Peer should see EOF after both descriptors are closed")
153+
}
154+
}
155+
156+
#endif

0 commit comments

Comments
 (0)