Skip to content

Commit ac37733

Browse files
committed
[fix] SSL write data loss on non-blocking partial flush (#242)
write() unconditionally called netWriteData.clear() after a non-blocking flushData() that may not have flushed all encrypted bytes. This discarded unflushed TLS records, corrupting the encrypted stream and causing 'Broken pipe' or 'Connection reset' errors on subsequent writes — most commonly seen during 'gem push' of large gems over HTTPS. Fix: replace clear() with compact() which preserves unflushed bytes by moving them to the front of the buffer before engine.wrap() appends new encrypted output. Additionally, sysreadImpl() now flushes pending netWriteData before reading. After write_nonblock, encrypted bytes could remain unsent; without flushing first the server would never receive the complete request body (e.g. net/http POST), causing it to time out or reset.
1 parent 84ae438 commit ac37733

File tree

2 files changed

+300
-1
lines changed

2 files changed

+300
-1
lines changed

src/main/java/org/jruby/ext/openssl/SSLSocket.java

Lines changed: 11 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -702,7 +702,9 @@ public int write(ByteBuffer src, boolean blocking) throws SSLException, IOExcept
702702
if ( netWriteData.hasRemaining() ) {
703703
flushData(blocking);
704704
}
705-
netWriteData.clear();
705+
// use compact() to preserve any encrypted bytes that flushData could not send (non-blocking partial write)
706+
// clear() would discard them, corrupting the TLS record stream:
707+
netWriteData.compact();
706708
final SSLEngineResult result = engine.wrap(src, netWriteData);
707709
if ( result.getStatus() == SSLEngineResult.Status.CLOSED ) {
708710
throw getRuntime().newIOError("closed SSL engine");
@@ -867,6 +869,14 @@ private IRubyObject sysreadImpl(final ThreadContext context, final IRubyObject l
867869
}
868870

869871
try {
872+
// Flush any pending encrypted write data before reading.
873+
// After write_nonblock, encrypted bytes may remain in netWriteData that haven't been sent to the server.
874+
// If we read without flushing, the server may not have received the complete request
875+
// (e.g. net/http POST body) and will not send a response.
876+
if ( engine != null && netWriteData.hasRemaining() ) {
877+
flushData(blocking);
878+
}
879+
870880
// So we need to make sure to only block when there is no data left to process
871881
if ( engine == null || ! ( appReadData.hasRemaining() || netReadData.position() > 0 ) ) {
872882
final Object ex = waitSelect(SelectionKey.OP_READ, blocking, exception);
Lines changed: 289 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,289 @@
1+
require File.expand_path('test_helper', File.dirname(__FILE__))
2+
3+
class TestWriteNonblock < TestCase
4+
5+
include SSLTestHelper
6+
7+
# Reproduces the data loss: write a large payload via write_nonblock
8+
# with a slow-reading server (small recv buffer), then read the response.
9+
# The server echoes back the byte count it received. If bytes were lost,
10+
# the count will be less than expected.
11+
def test_write_nonblock_data_integrity
12+
expected_size = 256 * 1024 # 256KB — large enough to overflow TCP buffers
13+
14+
# Custom server: reads all data until a blank line, counts bytes, sends back the count.
15+
# Deliberately slow: small recv buffer + sleep between reads to create backpressure.
16+
server_proc = Proc.new do |context, ssl|
17+
begin
18+
total = 0
19+
while (line = ssl.gets)
20+
break if line.strip.empty?
21+
total += line.bytesize
22+
end
23+
ssl.write("RECEIVED #{total}\n")
24+
rescue IOError, OpenSSL::SSL::SSLError => e
25+
# If the TLS stream is corrupted, the server may get an error here
26+
warn "Server error: #{e.class}: #{e.message}" if $VERBOSE
27+
ensure
28+
ssl.close rescue nil
29+
end
30+
end
31+
32+
[OpenSSL::SSL::TLS1_2_VERSION, OpenSSL::SSL::TLS1_3_VERSION].each do |tls_version|
33+
ctx_proc = Proc.new { |ctx| ctx.min_version = ctx.max_version = tls_version }
34+
start_server0(PORT, OpenSSL::SSL::VERIFY_NONE, true,
35+
:ctx_proc => ctx_proc, :server_proc => server_proc) do |server, port|
36+
sock = TCPSocket.new("127.0.0.1", port)
37+
# Small send buffer to increase the chance of partial non-blocking writes
38+
sock.setsockopt(Socket::SOL_SOCKET, Socket::SO_SNDBUF, 4096)
39+
sock.setsockopt(Socket::SOL_SOCKET, Socket::SO_RCVBUF, 4096)
40+
41+
ssl = OpenSSL::SSL::SSLSocket.new(sock)
42+
ssl.sync_close = true
43+
ssl.connect
44+
45+
# Build a large payload: many lines totaling expected_size bytes
46+
line = "X" * 1023 + "\n" # 1024 bytes per line
47+
lines_needed = expected_size / line.bytesize
48+
payload = line * lines_needed
49+
actual_payload_size = payload.bytesize
50+
51+
# Write it all using write_nonblock (as net/http does)
52+
written = 0
53+
while written < payload.bytesize
54+
begin
55+
n = ssl.write_nonblock(payload.byteslice(written, payload.bytesize - written))
56+
written += n
57+
rescue IO::WaitWritable, OpenSSL::SSL::SSLErrorWaitWritable
58+
IO.select(nil, [ssl], nil, 5)
59+
retry
60+
end
61+
end
62+
63+
# Send terminator
64+
ssl.write("\n")
65+
66+
# Read the response (this is where the flush-before-read matters)
67+
response = nil
68+
deadline = Time.now + 10
69+
while Time.now < deadline
70+
begin
71+
response = ssl.gets
72+
break if response
73+
rescue IO::WaitReadable, OpenSSL::SSL::SSLErrorWaitReadable
74+
IO.select([ssl], nil, nil, 5)
75+
end
76+
end
77+
78+
assert_not_nil response, "No response from server (TLS #{ssl.ssl_version})"
79+
assert_match(/^RECEIVED (\d+)/, response)
80+
received = response[/RECEIVED (\d+)/, 1].to_i
81+
assert_equal actual_payload_size, received,
82+
"Server received #{received} bytes but we sent #{actual_payload_size} " \
83+
"(lost #{actual_payload_size - received} bytes) on #{ssl.ssl_version}"
84+
85+
ssl.close
86+
end
87+
end
88+
end
89+
90+
# Simpler test: write_nonblock followed by sysread should work.
91+
# This is the net/http pattern: POST body via write, then read response.
92+
def test_write_nonblock_then_sysread
93+
server_proc = Proc.new do |context, ssl|
94+
begin
95+
data = +""
96+
while (line = ssl.gets)
97+
break if line.strip == "END"
98+
data << line
99+
end
100+
ssl.write("OK:#{data.bytesize}\n")
101+
rescue IOError, OpenSSL::SSL::SSLError
102+
ensure
103+
ssl.close rescue nil
104+
end
105+
end
106+
107+
ctx_proc = Proc.new { |ctx| ctx.min_version = ctx.max_version = OpenSSL::SSL::TLS1_3_VERSION }
108+
start_server0(PORT, OpenSSL::SSL::VERIFY_NONE, true,
109+
:ctx_proc => ctx_proc, :server_proc => server_proc) do |server, port|
110+
sock = TCPSocket.new("127.0.0.1", port)
111+
sock.setsockopt(Socket::SOL_SOCKET, Socket::SO_SNDBUF, 4096)
112+
ssl = OpenSSL::SSL::SSLSocket.new(sock)
113+
ssl.sync_close = true
114+
ssl.connect
115+
116+
# Write via write_nonblock
117+
payload = "Y" * 50_000 + "\n"
118+
written = 0
119+
while written < payload.bytesize
120+
begin
121+
n = ssl.write_nonblock(payload.byteslice(written, payload.bytesize - written))
122+
written += n
123+
rescue IO::WaitWritable, OpenSSL::SSL::SSLErrorWaitWritable
124+
IO.select(nil, [ssl], nil, 5)
125+
retry
126+
end
127+
end
128+
ssl.write("END\n")
129+
130+
# Now read response via sysread (the net/http pattern)
131+
IO.select([ssl], nil, nil, 10)
132+
response = ssl.sysread(1024)
133+
assert_match(/^OK:(\d+)/, response)
134+
received = response[/OK:(\d+)/, 1].to_i
135+
assert_equal payload.bytesize, received, "Server received #{received} bytes but sent #{payload.bytesize}"
136+
137+
ssl.close
138+
end
139+
end
140+
141+
# Test that multiple write_nonblock calls preserve all data even under
142+
# buffer pressure (many small writes)
143+
def test_many_small_write_nonblock_calls
144+
server_proc = Proc.new do |context, ssl|
145+
begin
146+
total = 0
147+
while (line = ssl.gets)
148+
break if line.strip == "DONE"
149+
total += line.bytesize
150+
end
151+
ssl.write("TOTAL:#{total}\n")
152+
rescue IOError, OpenSSL::SSL::SSLError
153+
ensure
154+
ssl.close rescue nil
155+
end
156+
end
157+
158+
ctx_proc = Proc.new { |ctx| ctx.min_version = ctx.max_version = OpenSSL::SSL::TLS1_3_VERSION }
159+
start_server0(PORT, OpenSSL::SSL::VERIFY_NONE, true,
160+
:ctx_proc => ctx_proc, :server_proc => server_proc) do |server, port|
161+
sock = TCPSocket.new("127.0.0.1", port)
162+
sock.setsockopt(Socket::SOL_SOCKET, Socket::SO_SNDBUF, 4096)
163+
ssl = OpenSSL::SSL::SSLSocket.new(sock)
164+
ssl.sync_close = true
165+
ssl.connect
166+
167+
# Send 500 small lines rapidly via write_nonblock
168+
line = "Z" * 200 + "\n"
169+
expected_total = 0
170+
500.times do
171+
written = 0
172+
while written < line.bytesize
173+
begin
174+
n = ssl.write_nonblock(line.byteslice(written, line.bytesize - written))
175+
written += n
176+
rescue IO::WaitWritable, OpenSSL::SSL::SSLErrorWaitWritable
177+
IO.select(nil, [ssl], nil, 5)
178+
retry
179+
end
180+
end
181+
expected_total += line.bytesize
182+
end
183+
ssl.write("DONE\n")
184+
185+
IO.select([ssl], nil, nil, 10)
186+
response = ssl.gets
187+
assert_not_nil response
188+
received = response[/TOTAL:(\d+)/, 1].to_i
189+
assert_equal expected_total, received, "Server received #{received} bytes but sent #{expected_total}"
190+
191+
ssl.close
192+
end
193+
end
194+
195+
# Detect the netWriteData.clear() bug by invoking the Java write() directly, bypassing syswriteImpl's `waitSelect`.
196+
#
197+
# Reproducer for #242 (jruby/jruby#8935).
198+
#
199+
# Strategy:
200+
# 1. Saturate the TCP send buffer (server doesn't read)
201+
# 2. Call write(ByteBuffer, false) directly via Java reflection
202+
# 3. Check netWriteData.remaining() — if > 0, data would be discarded by the next write() call's netWriteData.clear()
203+
def test_internal_write_nonblock_unflushed_data_detected
204+
require 'socket'
205+
206+
tcp_server = TCPServer.new("127.0.0.1", 0)
207+
port = tcp_server.local_address.ip_port
208+
209+
server_ctx = OpenSSL::SSL::SSLContext.new
210+
server_ctx.cert = @svr_cert
211+
server_ctx.key = @svr_key
212+
server_ctx.min_version = server_ctx.max_version = OpenSSL::SSL::TLS1_3_VERSION
213+
214+
ssl_server = OpenSSL::SSL::SSLServer.new(tcp_server, server_ctx)
215+
ssl_server.start_immediately = true
216+
217+
server_thread = Thread.new do
218+
Thread.current.report_on_exception = false
219+
begin
220+
ssl_conn = ssl_server.accept
221+
sleep 30 # Do NOT read — maximize backpressure
222+
ssl_conn.close rescue nil
223+
rescue
224+
end
225+
end
226+
227+
begin
228+
sock = TCPSocket.new("127.0.0.1", port)
229+
sock.setsockopt(Socket::SOL_SOCKET, Socket::SO_SNDBUF, 4096)
230+
sock.setsockopt(Socket::SOL_SOCKET, Socket::SO_RCVBUF, 4096)
231+
232+
ssl = OpenSSL::SSL::SSLSocket.new(sock)
233+
ssl.sync_close = true
234+
ssl.connect
235+
236+
java_cls = Java::OrgJrubyExtOpenssl::SSLSocket.java_class
237+
java_ssl = ssl.to_java(Java::OrgJrubyExtOpenssl::SSLSocket)
238+
239+
nwd_field = java_cls.declared_field("netWriteData")
240+
nwd_field.accessible = true
241+
# Get the write(ByteBuffer, boolean) method via reflection
242+
write_method = java_cls.declared_method("write", java.nio.ByteBuffer.java_class, Java::boolean)
243+
write_method.accessible = true
244+
245+
# Phase 1: fill the TCP send buffer via normal write_nonblock
246+
chunk = "H" * 16384
247+
100.times do
248+
begin
249+
ssl.write_nonblock(chunk)
250+
rescue IO::WaitWritable, OpenSSL::SSL::SSLErrorWaitWritable
251+
break
252+
rescue IOError, OpenSSL::SSL::SSLError
253+
break
254+
end
255+
end
256+
257+
# Phase 2: call write(src, false) directly — this bypasses
258+
# syswriteImpl's waitSelect and goes straight to the code path
259+
# that has the clear() bug.
260+
src = java.nio.ByteBuffer.wrap(("I" * 4096).to_java_bytes)
261+
begin
262+
write_method.invoke(java_ssl, src, false)
263+
rescue Java::JavaLangReflect::InvocationTargetException => e
264+
warn "write() threw: #{e.cause}" if $VERBOSE # Expected — write may throw due to the saturated buffer
265+
end
266+
267+
nwd = nwd_field.value(java_ssl)
268+
remaining = nwd.remaining
269+
270+
if remaining > 0
271+
# BUG CONFIRMED: there are unflushed encrypted bytes in netWriteData.
272+
# The next write() call will do netWriteData.clear(), discarding them.
273+
# This is exactly the data loss bug from issue #242.
274+
#
275+
# To prove actual data loss, we would call write() again — the clear() would discard remaining encrypted bytes,
276+
# corrupting the TLS record stream and eventually causing the server to see fewer bytes than the client sent.
277+
assert remaining > 0, "netWriteData has #{remaining} unflushed bytes — next write() would discard them via clear()"
278+
else
279+
omit "Could not produce unflushed netWriteData in loopback (remaining=#{remaining}); bug requires network latency"
280+
end
281+
ensure
282+
ssl.close rescue nil
283+
sock.close rescue nil
284+
tcp_server.close rescue nil
285+
server_thread.kill rescue nil
286+
server_thread.join(2) rescue nil
287+
end
288+
end if defined?(JRUBY_VERSION)
289+
end

0 commit comments

Comments
 (0)