Skip to content

Commit 9e73659

Browse files
committed
[fix] Harden upload retries on slow networks
Let adaptive upload timeouts control storage uploads and classify client-side transport resets so slow, unstable connections downscale instead of surfacing as server failures. Made-with: Cursor
1 parent 90808d2 commit 9e73659

11 files changed

Lines changed: 154 additions & 20 deletions

File tree

src/ByteSync.Client/Services/Communications/Transfers/Strategies/AzureBlobStorageUploadStrategy.cs

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -21,7 +21,7 @@ public async Task<UploadFileResponse> UploadAsync(FileUploaderSlice slice, FileS
2121
try
2222
{
2323
var options = new BlobClientOptions();
24-
options.Retry.NetworkTimeout = TimeSpan.FromMinutes(1);
24+
options.Retry.NetworkTimeout = TimeSpan.FromMinutes(10);
2525

2626
slice.MemoryStream.Position = 0;
2727
var blob = new BlobClient(new Uri(storageLocation.Url), options);

src/ByteSync.Client/Services/Communications/Transfers/Strategies/CloudflareR2UploadStrategy.cs

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -29,7 +29,7 @@ public async Task<UploadFileResponse> UploadAsync(FileUploaderSlice slice, FileS
2929
slice.MemoryStream.Position = 0;
3030

3131
using var httpClient = _httpClientFactory.CreateClient();
32-
httpClient.Timeout = TimeSpan.FromMinutes(1);
32+
httpClient.Timeout = Timeout.InfiniteTimeSpan;
3333
httpClient.DefaultRequestHeaders.ExpectContinue = false;
3434

3535
// Build ReadOnlyMemory without copying when possible; fallback to ToArray otherwise

src/ByteSync.Client/Services/Communications/Transfers/Strategies/UploadFailureClassifier.cs

Lines changed: 36 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,4 +1,7 @@
11
using System;
2+
using System.IO;
3+
using System.Net.Http;
4+
using System.Net.Sockets;
25
using System.Threading;
36
using ByteSync.Common.Business.Communications.Transfers;
47

@@ -12,12 +15,44 @@ public static UploadFileResponse Classify(Exception exception, CancellationToken
1215
{
1316
return UploadFileResponse.ClientCancellation(exception);
1417
}
15-
18+
1619
if (exception is OperationCanceledException)
1720
{
1821
return UploadFileResponse.ClientTimeout(exception);
1922
}
2023

24+
if (IsClientNetworkError(exception))
25+
{
26+
return UploadFileResponse.ClientNetworkError(exception);
27+
}
28+
2129
return UploadFileResponse.Failure(500, exception);
2230
}
31+
32+
private static bool IsClientNetworkError(Exception exception)
33+
{
34+
if (exception is not HttpRequestException and not IOException)
35+
{
36+
return false;
37+
}
38+
39+
var current = exception;
40+
while (current != null)
41+
{
42+
if (current is SocketException socketException)
43+
{
44+
return socketException.SocketErrorCode is SocketError.ConnectionReset
45+
or SocketError.ConnectionAborted
46+
or SocketError.TimedOut
47+
or SocketError.NetworkDown
48+
or SocketError.NetworkUnreachable
49+
or SocketError.HostDown
50+
or SocketError.HostUnreachable;
51+
}
52+
53+
current = current.InnerException;
54+
}
55+
56+
return false;
57+
}
2358
}

src/ByteSync.Client/Services/Communications/Transfers/Uploading/AdaptiveUploadController.cs

Lines changed: 7 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -94,9 +94,9 @@ public void RecordUploadResult(UploadResult uploadResult)
9494
return;
9595
}
9696

97-
if (uploadResult.FailureKind == UploadFailureKind.ClientTimeout)
97+
if (uploadResult.FailureKind is UploadFailureKind.ClientTimeout or UploadFailureKind.ClientNetworkError)
9898
{
99-
HandleClientTimeout(uploadResult.FileId);
99+
HandleClientNetworkIssue(uploadResult.FileId, uploadResult.FailureKind);
100100
return;
101101
}
102102

@@ -162,26 +162,27 @@ private void EnqueueSample(TimeSpan elapsed, bool isSuccess, long actualBytes)
162162
}
163163
}
164164

165-
private void HandleClientTimeout(string? fileId)
165+
private void HandleClientNetworkIssue(string? fileId, UploadFailureKind failureKind)
166166
{
167167
_consecutiveClientTimeouts += 1;
168168
if (_consecutiveClientTimeouts < CLIENT_TIMEOUTS_BEFORE_DOWNSCALE)
169169
{
170170
_logger.LogDebug(
171-
"Adaptive: file {FileId} client timeout {TimeoutCount}/{Threshold}. Waiting before downscale",
171+
"Adaptive: file {FileId} client network issue {FailureKind} {TimeoutCount}/{Threshold}. Waiting before downscale",
172172
fileId ?? "-",
173+
failureKind,
173174
_consecutiveClientTimeouts,
174175
CLIENT_TIMEOUTS_BEFORE_DOWNSCALE);
175176

176177
return;
177178
}
178179

179180
_logger.LogInformation(
180-
"Adaptive: file {FileId} client timeout threshold reached ({TimeoutCount}). Downscaling upload settings",
181+
"Adaptive: file {FileId} client network issue threshold reached ({TimeoutCount}). Downscaling upload settings",
181182
fileId ?? "-",
182183
_consecutiveClientTimeouts);
183184
_consecutiveClientTimeouts = 0;
184-
Downscale(fileId, "client timeouts");
185+
Downscale(fileId, "client network issues");
185186
}
186187

187188
private bool HandleBandwidthReset(bool isSuccess, int? statusCode)

src/ByteSync.Client/Services/Communications/Transfers/Uploading/UploadAttemptTimeoutPolicy.cs

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -5,7 +5,7 @@ namespace ByteSync.Services.Communications.Transfers.Uploading;
55
public static class UploadAttemptTimeoutPolicy
66
{
77
private const int AttemptTimeoutFloorSeconds = 60;
8-
private const int AttemptTimeoutCeilingSeconds = 120;
8+
private const int AttemptTimeoutCeilingSeconds = 180;
99
private const int SecondsPerMegabyteHeuristic = 3;
1010
private const int RetryGrowthSeconds = 15;
1111
private const int StaleChunkPenaltySeconds = 5;

src/ByteSync.Common/Business/Communications/Transfers/UploadFailureKind.cs

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -6,4 +6,5 @@ public enum UploadFailureKind
66
ServerError = 1,
77
ClientCancellation = 2,
88
ClientTimeout = 3,
9+
ClientNetworkError = 4,
910
}

src/ByteSync.Common/Business/Communications/Transfers/UploadFileResponse.cs

Lines changed: 12 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -66,4 +66,16 @@ public static UploadFileResponse ClientTimeout(Exception exception)
6666
FailureKind = UploadFailureKind.ClientTimeout,
6767
};
6868
}
69+
70+
public static UploadFileResponse ClientNetworkError(Exception exception)
71+
{
72+
return new UploadFileResponse
73+
{
74+
IsSuccess = false,
75+
StatusCode = 0,
76+
ErrorMessage = exception.Message,
77+
Exception = exception,
78+
FailureKind = UploadFailureKind.ClientNetworkError,
79+
};
80+
}
6981
}

tests/ByteSync.Client.UnitTests/Services/Communications/Transfers/Strategies/CloudflareR2UploadStrategyTests.cs

Lines changed: 27 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -26,15 +26,28 @@ private static FileUploaderSlice CreateSlice(int partNumber = 1, int sizeBytes =
2626
private static FileStorageLocation CreateLocation() => new(UploadUrl, StorageProvider.CloudflareR2);
2727

2828
private static (CloudflareR2UploadStrategy strategy, Mock<HttpMessageHandler> handler) CreateStrategy()
29+
{
30+
var (strategy, handler, _) = CreateStrategyWithCreatedClients();
31+
return (strategy, handler);
32+
}
33+
34+
private static (CloudflareR2UploadStrategy strategy, Mock<HttpMessageHandler> handler, List<HttpClient> createdClients)
35+
CreateStrategyWithCreatedClients()
2936
{
3037
var handler = new Mock<HttpMessageHandler>(MockBehavior.Strict);
38+
var createdClients = new List<HttpClient>();
3139
var factory = new Mock<IHttpClientFactory>();
3240
factory
3341
.Setup(f => f.CreateClient(It.IsAny<string>()))
34-
.Returns(() => new HttpClient(handler.Object, disposeHandler: false));
42+
.Returns(() =>
43+
{
44+
var httpClient = new HttpClient(handler.Object, disposeHandler: false);
45+
createdClients.Add(httpClient);
46+
return httpClient;
47+
});
3548

3649
var strategy = new CloudflareR2UploadStrategy(NullLogger<CloudflareR2UploadStrategy>.Instance, factory.Object);
37-
return (strategy, handler);
50+
return (strategy, handler, createdClients);
3851
}
3952

4053
private static void SetupHandler(Mock<HttpMessageHandler> handler, HttpResponseMessage response)
@@ -70,6 +83,18 @@ public async Task UploadAsync_On2xx_ShouldReturnSuccess()
7083
response.FailureKind.Should().Be(UploadFailureKind.None);
7184
}
7285

86+
[Test]
87+
public async Task UploadAsync_ShouldLetCallerTokenControlAttemptTimeout()
88+
{
89+
var (strategy, handler, createdClients) = CreateStrategyWithCreatedClients();
90+
SetupHandler(handler, new HttpResponseMessage(HttpStatusCode.OK));
91+
92+
await strategy.UploadAsync(CreateSlice(), CreateLocation(), CancellationToken.None);
93+
94+
createdClients.Should().ContainSingle();
95+
createdClients[0].Timeout.Should().Be(Timeout.InfiniteTimeSpan);
96+
}
97+
7398
[Test]
7499
public async Task UploadAsync_On500_ShouldReturnServerFailure_WithRealStatusCode()
75100
{

tests/ByteSync.Client.UnitTests/Services/Communications/Transfers/Strategies/UploadFailureClassifierTests.cs

Lines changed: 21 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -2,6 +2,8 @@
22
using ByteSync.Services.Communications.Transfers.Strategies;
33
using FluentAssertions;
44
using NUnit.Framework;
5+
using System.Net.Http;
6+
using System.Net.Sockets;
57

68
namespace ByteSync.Client.UnitTests.Services.Communications.Transfers.Strategies;
79

@@ -80,16 +82,32 @@ public void Classify_GenericException_ShouldReturnServerError500()
8082
}
8183

8284
[Test]
83-
public void Classify_HttpRequestException_ShouldReturnServerError500()
85+
public void Classify_HttpRequestExceptionWithoutSocketFailure_ShouldReturnServerError500()
8486
{
8587
using var cts = new CancellationTokenSource();
8688
var ex = new HttpRequestException("network issue");
87-
89+
8890
var response = UploadFailureClassifier.Classify(ex, cts.Token);
89-
91+
9092
response.IsSuccess.Should().BeFalse();
9193
response.StatusCode.Should().Be(500);
9294
response.FailureKind.Should().Be(UploadFailureKind.ServerError);
9395
response.Exception.Should().BeSameAs(ex);
9496
}
97+
98+
[Test]
99+
public void Classify_HttpRequestExceptionWithConnectionReset_ShouldReturnClientNetworkError()
100+
{
101+
using var cts = new CancellationTokenSource();
102+
var socketException = new SocketException((int)SocketError.ConnectionReset);
103+
var ioException = new IOException("transport closed", socketException);
104+
var ex = new HttpRequestException("copy failed", ioException);
105+
106+
var response = UploadFailureClassifier.Classify(ex, cts.Token);
107+
108+
response.IsSuccess.Should().BeFalse();
109+
response.StatusCode.Should().Be(0);
110+
response.FailureKind.Should().Be(UploadFailureKind.ClientNetworkError);
111+
response.Exception.Should().BeSameAs(ex);
112+
}
95113
}

tests/ByteSync.Client.UnitTests/Services/Communications/Transfers/Uploading/AdaptiveUploadControllerTests.cs

Lines changed: 31 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -199,7 +199,22 @@ public void ClientTimeouts_DownscaleBelowInitialChunkSize_WhenAtMinParallelism()
199199
_controller.CurrentChunkSizeBytes.Should().BeLessThan(500 * 1024);
200200
_controller.CurrentChunkSizeBytes.Should().Be(375 * 1024);
201201
}
202-
202+
203+
[Test]
204+
public void ClientNetworkErrors_DownscaleBelowInitialChunkSize_WhenAtMinParallelism()
205+
{
206+
// Arrange
207+
_controller.CurrentParallelism.Should().Be(2);
208+
_controller.CurrentChunkSizeBytes.Should().Be(500 * 1024);
209+
210+
// Act
211+
FeedClientNetworkErrors(_controller, 2);
212+
213+
// Assert
214+
_controller.CurrentParallelism.Should().Be(2);
215+
_controller.CurrentChunkSizeBytes.Should().Be(375 * 1024);
216+
}
217+
203218
[Test]
204219
public void ClientTimeouts_ReduceParallelismFirst_WhenAboveMinParallelism()
205220
{
@@ -280,7 +295,21 @@ private static void FeedClientTimeouts(AdaptiveUploadController controller, int
280295
failureKind: UploadFailureKind.ClientTimeout);
281296
}
282297
}
283-
298+
299+
private static void FeedClientNetworkErrors(AdaptiveUploadController controller, int count)
300+
{
301+
for (var i = 0; i < count; i++)
302+
{
303+
RecordUploadResult(
304+
controller,
305+
TimeSpan.FromSeconds(90),
306+
isSuccess: false,
307+
partNumber: i + 1,
308+
statusCode: 0,
309+
failureKind: UploadFailureKind.ClientNetworkError);
310+
}
311+
}
312+
284313
private static void RecordUploadResult(
285314
AdaptiveUploadController controller,
286315
TimeSpan elapsed,

0 commit comments

Comments
 (0)