Skip to content

Commit eafe09e

Browse files
fix(audience-sdk): treat HTTP 429 as retryable with Retry-After backoff
Fixes a real SDK bug surfaced by the SDK-148 live-fire test suite: every 4xx — including 429 (Too Many Requests) — was treated as a permanent "server rejected your data, retry won't help" failure. Per RFC 6585, 429 is the canonical retryable 4xx and must honor the Retry-After header. Changes: - HttpTransport: split 429 out of the 4xx delete-and-error branch. Keep the batch on disk, honor Retry-After (delta-seconds or HTTP-date) if present, otherwise apply the existing exponential schedule (5/10/20/40/60s). Do NOT fire onError — the next flush tick retries automatically once the backoff window expires. Persistent rate-limiting manifests as a growing on-disk queue, the correct studio-actionable signal. - ImmutableAudience consent sync: wrap the PUT in a 4-attempt retry loop with 1s/2s/4s backoff between attempts (or Retry-After if the server supplies one). ConsentSyncFailed only fires after the budget exhausts. - HttpRetry helper class for shared Retry-After parsing across batch and consent-sync paths. Tests: - HttpTransport: 429 keeps batch + sets backoff + no onError; Retry-After delta-seconds drives NextAttemptAt; HTTP-date variant engages backoff window; past Retry-After falls through to exponential; 429-then-200 delivers the batch and clears backoff. - ConsentSync: 429-then-2xx is invisible to onError; 429×4 surfaces ConsentSyncFailed after the full retry budget.
1 parent 40dbaa6 commit eafe09e

6 files changed

Lines changed: 281 additions & 12 deletions

File tree

src/Packages/Audience/Runtime/ImmutableAudience.cs

Lines changed: 22 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -565,17 +565,33 @@ private static void SyncConsentToBackend(AudienceConfig config, ConsentLevel lev
565565

566566
Task.Run(async () =>
567567
{
568+
// 429 retried up to 4 attempts (1s/2s/4s or Retry-After).
569+
// Other non-2xx fail fast.
570+
const int maxAttempts = 4;
571+
var attempt = 0;
568572
try
569573
{
570-
using var request = new HttpRequestMessage(HttpMethod.Put, url);
571-
request.Headers.Add(Constants.PublishableKeyHeader, publishableKey);
572-
request.Content = new StringContent(body, System.Text.Encoding.UTF8, "application/json");
573-
using var response = await client.SendAsync(request, cancellationToken).ConfigureAwait(false);
574-
575-
if (!response.IsSuccessStatusCode)
574+
while (true)
576575
{
576+
attempt++;
577+
using var request = new HttpRequestMessage(HttpMethod.Put, url);
578+
request.Headers.Add(Constants.PublishableKeyHeader, publishableKey);
579+
request.Content = new StringContent(body, System.Text.Encoding.UTF8, "application/json");
580+
using var response = await client.SendAsync(request, cancellationToken).ConfigureAwait(false);
581+
582+
if (response.IsSuccessStatusCode) return;
583+
584+
if ((int)response.StatusCode == 429 && attempt < maxAttempts)
585+
{
586+
var delay = HttpRetry.ParseRetryAfter(response)
587+
?? TimeSpan.FromMilliseconds(1_000 * (1 << (attempt - 1)));
588+
await Task.Delay(delay, cancellationToken).ConfigureAwait(false);
589+
continue;
590+
}
591+
577592
NotifyErrorCallback(onError, AudienceErrorCode.ConsentSyncFailed,
578593
$"Consent sync failed with status {(int)response.StatusCode}");
594+
return;
579595
}
580596
}
581597
catch (OperationCanceledException) when (cancellationToken.IsCancellationRequested)
Lines changed: 24 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,24 @@
1+
#nullable enable
2+
3+
using System;
4+
using System.Net.Http;
5+
6+
namespace Immutable.Audience
7+
{
8+
internal static class HttpRetry
9+
{
10+
// Past HTTP-date returns null so callers fall through to their default backoff.
11+
internal static TimeSpan? ParseRetryAfter(HttpResponseMessage response)
12+
{
13+
var ra = response.Headers.RetryAfter;
14+
if (ra == null) return null;
15+
if (ra.Delta.HasValue) return ra.Delta.Value;
16+
if (ra.Date.HasValue)
17+
{
18+
var d = ra.Date.Value - DateTimeOffset.UtcNow;
19+
return d > TimeSpan.Zero ? d : (TimeSpan?)null;
20+
}
21+
return null;
22+
}
23+
}
24+
}

src/Packages/Audience/Runtime/Transport/HttpRetry.cs.meta

Lines changed: 11 additions & 0 deletions
Some generated files are not rendered by default. Learn more about customizing how changed files appear on GitHub.

src/Packages/Audience/Runtime/Transport/HttpTransport.cs

Lines changed: 28 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -114,13 +114,26 @@ internal async Task<bool> SendBatchAsync(CancellationToken ct = default)
114114
$"Batch partially rejected: {rejected} of {batch.Count} events dropped");
115115
}
116116
}
117+
else if (statusCode == 429)
118+
{
119+
// 429 is retryable (RFC 6585). Keep the batch, honor Retry-After
120+
// if present else use the existing 5xx backoff schedule. No
121+
// onError — next flush tick retries; persistent rate-limits
122+
// surface as a growing on-disk queue.
123+
var retryAfter = HttpRetry.ParseRetryAfter(response);
124+
if (retryAfter.HasValue)
125+
SetBackoffUntil(_getUtcNow() + retryAfter.Value);
126+
else
127+
RecordFailure();
128+
}
117129
else if (statusCode >= 400 && statusCode < 500)
118130
{
119-
// 4xx: server rejected the payload. Drop it (retry won't help) and
120-
// reset backoff — server is healthy, our data was the problem.
121-
// Capture the response body so the caller's OnError surfaces
122-
// the server's reason string ("unknown publishable key",
123-
// "missing field X", etc.) rather than a bare status code.
131+
// 4xx (non-429): server rejected the payload. Drop it (retry
132+
// won't help) and reset backoff — server is healthy, our data
133+
// was the problem. Capture the response body so the caller's
134+
// OnError surfaces the server's reason string ("unknown
135+
// publishable key", "missing field X", etc.) rather than a
136+
// bare status code.
124137
var rejectionBody = await ReadBodyForErrorAsync(response).ConfigureAwait(false);
125138
_store.Delete(batch);
126139
ResetBackoff();
@@ -205,6 +218,16 @@ private void RecordFailure()
205218
}
206219
}
207220

221+
// Server-supplied Retry-After is authoritative; bypasses BackoffMsLocked.
222+
private void SetBackoffUntil(DateTime nextAt)
223+
{
224+
lock (_backoffLock)
225+
{
226+
_consecutiveFailures++;
227+
_nextAttemptAt = nextAt;
228+
}
229+
}
230+
208231
private void ResetBackoff()
209232
{
210233
lock (_backoffLock)

src/Packages/Audience/Tests/Runtime/ConsentSyncTests.cs

Lines changed: 84 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -98,6 +98,70 @@ public void SetConsent_PutFailure_InvokesOnErrorWithConsentSyncFailed()
9898
StringAssert.Contains("500", captured.Message);
9999
}
100100

101+
[Test]
102+
public void SetConsent_429ThenSuccess_DoesNotFireConsentSyncFailed()
103+
{
104+
var handler = new CapturingHandler
105+
{
106+
StatusSequence = new[] { (HttpStatusCode)429, HttpStatusCode.NoContent },
107+
RetryAfterSeconds = 0,
108+
};
109+
AudienceError captured = null;
110+
var received = new ManualResetEventSlim(false);
111+
112+
var config = MakeConfig(handler, ConsentLevel.Anonymous);
113+
config.OnError = err =>
114+
{
115+
if (err.Code == AudienceErrorCode.ConsentSyncFailed)
116+
{
117+
captured = err;
118+
received.Set();
119+
}
120+
};
121+
ImmutableAudience.Init(config);
122+
123+
ImmutableAudience.SetConsent(ConsentLevel.Full);
124+
125+
// Wait long enough for both attempts (Retry-After: 0).
126+
Assert.IsFalse(received.Wait(TimeSpan.FromSeconds(3)),
127+
"transient 429 followed by 2xx must not surface ConsentSyncFailed");
128+
Assert.IsNull(captured);
129+
Assert.GreaterOrEqual(handler.PutCount, 2,
130+
"429 must trigger at least one retry");
131+
}
132+
133+
[Test]
134+
public void SetConsent_429Repeated_FiresConsentSyncFailedAfterRetries()
135+
{
136+
// RetryAfterSeconds=0 collapses the 1s/2s/4s production cadence
137+
// so the test runs in milliseconds.
138+
var handler = new CapturingHandler
139+
{
140+
Status = (HttpStatusCode)429,
141+
RetryAfterSeconds = 0,
142+
};
143+
AudienceError captured = null;
144+
var received = new ManualResetEventSlim(false);
145+
146+
var config = MakeConfig(handler, ConsentLevel.Anonymous);
147+
config.OnError = err =>
148+
{
149+
if (err.Code == AudienceErrorCode.ConsentSyncFailed)
150+
{
151+
captured = err;
152+
received.Set();
153+
}
154+
};
155+
ImmutableAudience.Init(config);
156+
157+
ImmutableAudience.SetConsent(ConsentLevel.Full);
158+
159+
Assert.IsTrue(received.Wait(TimeSpan.FromSeconds(5)),
160+
"exhausted 429 retries must surface ConsentSyncFailed");
161+
StringAssert.Contains("429", captured.Message);
162+
Assert.AreEqual(4, handler.PutCount, "must have made the full 4 attempts");
163+
}
164+
101165
private AudienceConfig MakeConfig(CapturingHandler handler, ConsentLevel consent) =>
102166
new AudienceConfig
103167
{
@@ -128,11 +192,20 @@ private class CapturingHandler : HttpMessageHandler
128192
internal CapturedRequest LastPut;
129193
internal HttpStatusCode Status { get; set; } = HttpStatusCode.NoContent;
130194

195+
// One status per call; falls back to Status once exhausted.
196+
internal HttpStatusCode[] StatusSequence { get; set; }
197+
198+
// Adds Retry-After: <seconds> to 429 responses (0 = retry now).
199+
internal int? RetryAfterSeconds { get; set; }
200+
201+
internal int PutCount { get; private set; }
202+
131203
protected override async Task<HttpResponseMessage> SendAsync(
132204
HttpRequestMessage request, CancellationToken ct)
133205
{
134206
if (request.Method == HttpMethod.Put)
135207
{
208+
PutCount++;
136209
LastPut = new CapturedRequest
137210
{
138211
Url = request.RequestUri!.ToString(),
@@ -142,7 +215,17 @@ protected override async Task<HttpResponseMessage> SendAsync(
142215
};
143216
PutReceived.Set();
144217
}
145-
return new HttpResponseMessage(Status);
218+
219+
var status = StatusSequence != null && PutCount - 1 < StatusSequence.Length
220+
? StatusSequence[PutCount - 1]
221+
: Status;
222+
223+
var response = new HttpResponseMessage(status);
224+
if ((int)status == 429 && RetryAfterSeconds.HasValue)
225+
{
226+
response.Headers.Add("Retry-After", RetryAfterSeconds.Value.ToString());
227+
}
228+
return response;
146229
}
147230
}
148231
}

src/Packages/Audience/Tests/Runtime/Transport/HttpTransportTests.cs

Lines changed: 112 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -203,6 +203,118 @@ public async Task SendBatchAsync_4xx_DeletesFilesAndResetsBackoff()
203203
Assert.AreEqual(AudienceErrorCode.ValidationRejected, reportedError!.Code);
204204
}
205205

206+
[Test]
207+
public async Task SendBatchAsync_429_NoRetryAfter_KeepsFilesAndUsesExpoBackoff_NoError()
208+
{
209+
_store.Write("{\"type\":\"track\"}");
210+
211+
var handler = new MockHandler((HttpStatusCode)429, "");
212+
AudienceError? reportedError = null;
213+
using var transport = new HttpTransport(_store, "pk_imapik-test-key1",
214+
onError: e => reportedError = e, handler: handler, getUtcNow: _getUtcNow);
215+
216+
await transport.SendBatchAsync();
217+
218+
Assert.AreEqual(1, _store.Count(), "429 must keep files for retry");
219+
Assert.IsTrue(transport.IsInBackoffWindow);
220+
Assert.AreEqual(5_000, transport.BackoffMs);
221+
Assert.IsNull(reportedError, "429 is transient — must not fire onError");
222+
}
223+
224+
[Test]
225+
public async Task SendBatchAsync_429_RetryAfterDeltaSeconds_OverridesExpoBackoff()
226+
{
227+
_store.Write("{\"type\":\"track\"}");
228+
229+
var handler = new MockHandler(() =>
230+
{
231+
var resp = new HttpResponseMessage((HttpStatusCode)429);
232+
resp.Headers.Add("Retry-After", "12");
233+
return resp;
234+
});
235+
using var transport = new HttpTransport(_store, "pk_imapik-test-key1",
236+
handler: handler, getUtcNow: _getUtcNow);
237+
238+
await transport.SendBatchAsync();
239+
240+
Assert.IsTrue(transport.IsInBackoffWindow);
241+
Assert.AreEqual(_utcNow.AddSeconds(12), transport.NextAttemptAt);
242+
}
243+
244+
[Test]
245+
public async Task SendBatchAsync_429_RetryAfterHttpDate_OverridesExpoBackoff()
246+
{
247+
// ParseRetryAfter computes the delta against DateTimeOffset.UtcNow,
248+
// which we can't pin from outside; assert only that a future date
249+
// engages the window. The seconds-form test above pins exact math.
250+
_store.Write("{\"type\":\"track\"}");
251+
252+
var handler = new MockHandler(() =>
253+
{
254+
var resp = new HttpResponseMessage((HttpStatusCode)429);
255+
resp.Headers.Add("Retry-After", DateTimeOffset.UtcNow.AddSeconds(20).ToString("R"));
256+
return resp;
257+
});
258+
using var transport = new HttpTransport(_store, "pk_imapik-test-key1",
259+
handler: handler, getUtcNow: _getUtcNow);
260+
261+
await transport.SendBatchAsync();
262+
263+
Assert.AreEqual(1, _store.Count());
264+
Assert.IsTrue(transport.IsInBackoffWindow);
265+
}
266+
267+
[Test]
268+
public async Task SendBatchAsync_429_PastRetryAfterDate_FallsBackToExpoBackoff()
269+
{
270+
// Past Retry-After (clock skew or server bug) must not let
271+
// IsInBackoffWindow flip false and trigger instant retry.
272+
_store.Write("{\"type\":\"track\"}");
273+
274+
var handler = new MockHandler(() =>
275+
{
276+
var resp = new HttpResponseMessage((HttpStatusCode)429);
277+
resp.Headers.Add("Retry-After", DateTimeOffset.UtcNow.AddSeconds(-30).ToString("R"));
278+
return resp;
279+
});
280+
using var transport = new HttpTransport(_store, "pk_imapik-test-key1",
281+
handler: handler, getUtcNow: _getUtcNow);
282+
283+
await transport.SendBatchAsync();
284+
285+
Assert.AreEqual(5_000, transport.BackoffMs);
286+
Assert.IsTrue(transport.IsInBackoffWindow);
287+
}
288+
289+
[Test]
290+
public async Task SendBatchAsync_429ThenSuccess_DeliversBatchAndClearsBackoff()
291+
{
292+
_store.Write("{\"type\":\"track\"}");
293+
294+
var callCount = 0;
295+
var handler = new MockHandler(() =>
296+
{
297+
callCount++;
298+
return callCount == 1
299+
? new HttpResponseMessage((HttpStatusCode)429)
300+
: new HttpResponseMessage(HttpStatusCode.OK)
301+
{ Content = new StringContent("{\"accepted\":1,\"rejected\":0}") };
302+
});
303+
AudienceError? reportedError = null;
304+
using var transport = new HttpTransport(_store, "pk_imapik-test-key1",
305+
onError: e => reportedError = e, handler: handler, getUtcNow: _getUtcNow);
306+
307+
await transport.SendBatchAsync();
308+
Assert.AreEqual(1, _store.Count(), "429 keeps the batch");
309+
Assert.AreEqual(5_000, transport.BackoffMs);
310+
311+
Advance(5_001);
312+
await transport.SendBatchAsync();
313+
Assert.AreEqual(0, _store.Count(), "200 on retry deletes the batch");
314+
Assert.AreEqual(0, transport.BackoffMs, "backoff resets after success");
315+
Assert.IsNull(reportedError, "neither 429 nor success must fire onError");
316+
}
317+
206318
[Test]
207319
public async Task SendBatchAsync_200_WithRejected_DeletesFilesAndSurfacesValidationRejected()
208320
{

0 commit comments

Comments
 (0)