Skip to content

Commit aa1b167

Browse files
committed
Fix SFTP server hang on WS_WANT_WRITE with non-blocking sockets
When wolfSSH_SFTP_buffer_send() called wolfSSH_stream_send(), the data would be consumed into the SSH output buffer even if the underlying socket returned EWOULDBLOCK/EAGAIN. SendChannelData() returns the positive dataSz on WS_WANT_WRITE, causing the SFTP layer to advance its buffer index as if the data was sent. The SSH output buffer still had pending data that was never flushed, leading to an indefinite hang. Fix: At the start of wolfSSH_SFTP_buffer_send(), check if there's pending data in ssh->outputBuffer from a previous WS_WANT_WRITE. If so, attempt to flush it first and return WS_WANT_WRITE if the flush fails. This ensures the caller retries until all pending data is sent. Also expose WS_SFTP_BUFFER and wolfSSH_SFTP_buffer_send() as WOLFSSH_LOCAL for unit testing, and add regression test that verifies the fix catches the bug. Fixes ZD 21157
1 parent d32dd2b commit aa1b167

File tree

4 files changed

+183
-15
lines changed

4 files changed

+183
-15
lines changed

.github/workflows/paramiko-sftp-test.yml

Lines changed: 60 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -132,45 +132,97 @@ jobs:
132132
import os
133133
import time
134134
import sys
135-
135+
import hashlib
136+
import signal
137+
138+
# Timeout handler for detecting hangs
139+
class TimeoutError(Exception):
140+
pass
141+
142+
def timeout_handler(signum, frame):
143+
raise TimeoutError("SFTP operation timed out - possible hang detected!")
144+
145+
def get_file_hash(filepath):
146+
"""Calculate MD5 hash of a file."""
147+
hash_md5 = hashlib.md5()
148+
with open(filepath, "rb") as f:
149+
for chunk in iter(lambda: f.read(8192), b""):
150+
hash_md5.update(chunk)
151+
return hash_md5.hexdigest()
152+
136153
def run_sftp_test():
137154
# Create SSH client
138155
ssh = paramiko.SSHClient()
139156
ssh.set_missing_host_key_policy(paramiko.AutoAddPolicy())
140-
157+
141158
# Connect to server using password authentication with testuser
142159
print("Connecting to wolfSSHd server...")
143160
try:
144161
ssh.connect('127.0.0.1', port=22222, username='testuser', password='testpassword')
145162
except Exception as e:
146163
print(f"Connection error: {e}")
147164
raise
148-
165+
149166
# Open SFTP session
150167
print("Opening SFTP session...")
151168
sftp = ssh.open_sftp()
152-
169+
153170
# Upload test
154171
print("Uploading 20MB test file...")
155172
start_time = time.time()
156173
sftp.put('/tmp/sftp_upload/test_upload.dat', '/tmp/test_upload.dat')
157174
upload_time = time.time() - start_time
158175
print(f"Upload completed in {upload_time:.2f} seconds")
159-
176+
160177
# Download test
161178
print("Downloading 20MB test file...")
162179
start_time = time.time()
163180
sftp.get('/tmp/test_upload.dat', '/tmp/sftp_download/test_download.dat')
164181
download_time = time.time() - start_time
165182
print(f"Download completed in {download_time:.2f} seconds")
166-
183+
184+
# Stress test: Repeated GET operations with prefetch
185+
# This tests the WS_WANT_WRITE handling during repeated transfers
186+
# which was the original bug trigger (SFTP hang on non-blocking sockets)
187+
print("\n=== Starting repeated GET stress test (prefetch enabled) ===")
188+
num_iterations = 10
189+
timeout_seconds = 30 # Per-operation timeout to detect hangs
190+
191+
for i in range(num_iterations):
192+
signal.signal(signal.SIGALRM, timeout_handler)
193+
signal.alarm(timeout_seconds)
194+
try:
195+
download_path = f'/tmp/sftp_download/stress_test_{i}.dat'
196+
start_time = time.time()
197+
# Paramiko uses prefetch by default for get()
198+
sftp.get('/tmp/test_upload.dat', download_path)
199+
elapsed = time.time() - start_time
200+
signal.alarm(0) # Cancel alarm
201+
202+
# Verify integrity
203+
orig_hash = get_file_hash('/tmp/sftp_upload/test_upload.dat')
204+
down_hash = get_file_hash(download_path)
205+
if orig_hash != down_hash:
206+
print(f" Iteration {i+1}: FAILED - hash mismatch!")
207+
return False
208+
209+
print(f" Iteration {i+1}/{num_iterations}: OK ({elapsed:.2f}s)")
210+
os.remove(download_path) # Cleanup
211+
212+
except TimeoutError as e:
213+
print(f" Iteration {i+1}: FAILED - {e}")
214+
print(" This may indicate the WS_WANT_WRITE hang bug!")
215+
return False
216+
217+
print(f"=== Stress test completed: {num_iterations} iterations OK ===\n")
218+
167219
# Close connections
168220
sftp.close()
169221
ssh.close()
170-
222+
171223
print("SFTP session closed")
172224
return True
173-
225+
174226
if __name__ == "__main__":
175227
try:
176228
success = run_sftp_test()

src/wolfsftp.c

Lines changed: 23 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -145,13 +145,8 @@ enum WS_SFTP_LSTAT_STATE_ID {
145145
STATE_LSTAT_CLEANUP
146146
};
147147

148-
/* This structure is to help with nonblocking and keeping track of state.
148+
/* WS_SFTP_BUFFER is defined in wolfsftp.h for use with nonblocking state.
149149
* If adding any read/writes use the wolfSSH_SFTP_buffer_read/send functions */
150-
typedef struct WS_SFTP_BUFFER {
151-
byte* data;
152-
word32 sz;
153-
word32 idx;
154-
} WS_SFTP_BUFFER;
155150

156151
typedef struct WS_SFTP_CHMOD_STATE {
157152
enum WS_SFTP_CHMOD_STATE_ID state;
@@ -513,7 +508,7 @@ static void wolfSSH_SFTP_buffer_rewind(WS_SFTP_BUFFER* buffer)
513508

514509
/* try to send the rest of the buffer (buffer.sz - buffer.idx)
515510
* increments idx with amount sent */
516-
static int wolfSSH_SFTP_buffer_send(WOLFSSH* ssh, WS_SFTP_BUFFER* buffer)
511+
WOLFSSH_LOCAL int wolfSSH_SFTP_buffer_send(WOLFSSH* ssh, WS_SFTP_BUFFER* buffer)
517512
{
518513
int ret = WS_SUCCESS;
519514
int err;
@@ -526,6 +521,19 @@ static int wolfSSH_SFTP_buffer_send(WOLFSSH* ssh, WS_SFTP_BUFFER* buffer)
526521
return WS_BUFFER_E;
527522
}
528523

524+
/* Flush any pending data in SSH output buffer first.
525+
* Handles case where previous send returned WS_WANT_WRITE
526+
* and data is still buffered at the SSH layer. */
527+
if (ssh->outputBuffer.length > ssh->outputBuffer.idx) {
528+
ret = wolfSSH_SendPacket(ssh);
529+
if (ret == WS_WANT_WRITE) {
530+
return ret;
531+
}
532+
if (ret < 0) {
533+
return ret;
534+
}
535+
}
536+
529537
/* Call wolfSSH worker if rekeying or adjusting window size */
530538
err = wolfSSH_get_error(ssh);
531539
if (err == WS_WINDOW_FULL || err == WS_REKEYING) {
@@ -1603,6 +1611,14 @@ int wolfSSH_SFTP_read(WOLFSSH* ssh)
16031611
ssh->error = WS_WANT_WRITE;
16041612
return WS_FATAL_ERROR;
16051613
}
1614+
/* Check if SSH layer still has pending data from WS_WANT_WRITE.
1615+
* Even if SFTP buffer is fully consumed, the data may still be
1616+
* sitting in the SSH output buffer waiting to be sent. */
1617+
if (ssh->error == WS_WANT_WRITE ||
1618+
ssh->outputBuffer.length > ssh->outputBuffer.idx) {
1619+
ssh->error = WS_WANT_WRITE;
1620+
return WS_FATAL_ERROR;
1621+
}
16061622
ret = WS_SUCCESS;
16071623
state->toSend = 0;
16081624
}

tests/regress.c

Lines changed: 90 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -40,6 +40,9 @@
4040
#include <wolfssh/port.h>
4141
#include <wolfssh/ssh.h>
4242
#include <wolfssh/internal.h>
43+
#ifdef WOLFSSH_SFTP
44+
#include <wolfssh/wolfsftp.h>
45+
#endif
4346
#include "apps/wolfssh/common.h"
4447

4548
#ifndef WOLFSSH_NO_ABORT
@@ -482,6 +485,89 @@ static void TestWorkerReadsWhenSendWouldBlock(void)
482485
#endif
483486

484487

488+
#ifdef WOLFSSH_SFTP
489+
/* Test that wolfSSH_SFTP_buffer_send() properly handles WS_WANT_WRITE when
490+
* SSH output buffer has pending data. This is a regression test for
491+
* the SFTP hang issue with non-blocking sockets.
492+
*
493+
* The fix checks for pending data in ssh->outputBuffer at the start of
494+
* wolfSSH_SFTP_buffer_send() and returns WS_WANT_WRITE if the flush fails. */
495+
static int sftpWantWriteCallCount = 0;
496+
497+
static int SftpWantWriteSendCb(WOLFSSH* ssh, void* buf, word32 sz, void* ctx)
498+
{
499+
(void)ssh; (void)buf; (void)ctx;
500+
sftpWantWriteCallCount++;
501+
/* First call returns WANT_WRITE, subsequent calls succeed */
502+
if (sftpWantWriteCallCount == 1) {
503+
return WS_CBIO_ERR_WANT_WRITE;
504+
}
505+
return (int)sz;
506+
}
507+
508+
static int SftpDummyRecv(WOLFSSH* ssh, void* buf, word32 sz, void* ctx)
509+
{
510+
(void)ssh; (void)buf; (void)sz; (void)ctx;
511+
return WS_CBIO_ERR_WANT_READ;
512+
}
513+
514+
static void TestSftpBufferSendPendingOutput(void)
515+
{
516+
WOLFSSH_CTX* ctx;
517+
WOLFSSH* ssh;
518+
WS_SFTP_BUFFER sftpBuf;
519+
byte testData[16];
520+
int ret;
521+
522+
ctx = wolfSSH_CTX_new(WOLFSSH_ENDPOINT_SERVER, NULL);
523+
AssertNotNull(ctx);
524+
525+
wolfSSH_SetIOSend(ctx, SftpWantWriteSendCb);
526+
wolfSSH_SetIORecv(ctx, SftpDummyRecv);
527+
528+
ssh = wolfSSH_new(ctx);
529+
AssertNotNull(ssh);
530+
531+
/* Set up SFTP buffer with some data to send */
532+
WMEMSET(testData, 0x42, sizeof(testData));
533+
WMEMSET(&sftpBuf, 0, sizeof(sftpBuf));
534+
sftpBuf.data = testData;
535+
sftpBuf.sz = sizeof(testData);
536+
sftpBuf.idx = 0;
537+
538+
/* Simulate pending data in SSH output buffer (as if previous send
539+
* returned WS_WANT_WRITE and data was buffered).
540+
* Note: outputBuffer is initialized by BufferInit() with bufferSz set
541+
* to at least STATIC_BUFFER_LEN (16 bytes), so we use a smaller value. */
542+
ssh->outputBuffer.length = 8; /* 8 bytes pending */
543+
ssh->outputBuffer.idx = 0; /* none sent yet */
544+
545+
sftpWantWriteCallCount = 0;
546+
547+
/* Call wolfSSH_SFTP_buffer_send - should return WS_WANT_WRITE because
548+
* the fix detects pending data in outputBuffer and tries to flush it,
549+
* which fails with WS_WANT_WRITE from our callback.
550+
*
551+
* Before the fix, the function would ignore the pending SSH output buffer
552+
* data and proceed to send new SFTP data, leading to a hang because the
553+
* pending data was never flushed. */
554+
ret = wolfSSH_SFTP_buffer_send(ssh, &sftpBuf);
555+
AssertIntEQ(ret, WS_WANT_WRITE);
556+
557+
/* Verify the SFTP buffer was NOT consumed (idx should still be 0).
558+
* This is critical - the SFTP layer must not advance its buffer index
559+
* until the SSH output buffer is flushed. */
560+
AssertIntEQ(sftpBuf.idx, 0);
561+
562+
/* Verify the SSH output buffer still has pending data */
563+
AssertTrue(ssh->outputBuffer.length > ssh->outputBuffer.idx);
564+
565+
wolfSSH_free(ssh);
566+
wolfSSH_CTX_free(ctx);
567+
}
568+
#endif /* WOLFSSH_SFTP */
569+
570+
485571
int main(int argc, char** argv)
486572
{
487573
WOLFSSH_CTX* ctx;
@@ -515,6 +601,10 @@ int main(int argc, char** argv)
515601
TestWorkerReadsWhenSendWouldBlock();
516602
#endif
517603

604+
#ifdef WOLFSSH_SFTP
605+
TestSftpBufferSendPendingOutput();
606+
#endif
607+
518608
/* TODO: add app-level regressions that simulate stdin EOF/password
519609
* prompts and mid-session socket closes once the test harness can
520610
* drive the wolfssh client without real sockets/tty. */

wolfssh/wolfsftp.h

Lines changed: 10 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -173,6 +173,13 @@ struct WS_SFTPNAME {
173173
#define WOLFSSH_MAX_SFTP_RECV 32768
174174
#endif
175175

176+
/* SFTP buffer for nonblocking state tracking */
177+
typedef struct WS_SFTP_BUFFER {
178+
byte* data;
179+
word32 sz;
180+
word32 idx;
181+
} WS_SFTP_BUFFER;
182+
176183
/* functions for establishing a connection */
177184
WOLFSSH_API int wolfSSH_SFTP_accept(WOLFSSH* ssh);
178185
WOLFSSH_API int wolfSSH_SFTP_connect(WOLFSSH* ssh);
@@ -282,6 +289,9 @@ WOLFSSL_LOCAL int SFTP_RemoveHandleNode(WOLFSSH* ssh, byte* handle,
282289

283290
WOLFSSH_LOCAL void wolfSSH_SFTP_ShowSizes(void);
284291

292+
/* SFTP buffer send - exposed for testing */
293+
WOLFSSH_LOCAL int wolfSSH_SFTP_buffer_send(WOLFSSH* ssh, WS_SFTP_BUFFER* buffer);
294+
285295
#ifdef __cplusplus
286296
}
287297
#endif

0 commit comments

Comments
 (0)