Skip to content

Commit ccfae1c

Browse files
authored
refactor: Livechat webhook retry & timeout logic (RocketChat#36793)
1 parent 078362e commit ccfae1c

3 files changed

Lines changed: 326 additions & 13 deletions

File tree

apps/meteor/app/livechat/server/hooks/sendToCRM.ts

Lines changed: 8 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -1,6 +1,7 @@
11
import type { IOmnichannelRoom, IOmnichannelSystemMessage, IMessage } from '@rocket.chat/core-typings';
22
import { isEditedMessage, isOmnichannelRoom } from '@rocket.chat/core-typings';
33
import { LivechatRooms, Messages } from '@rocket.chat/models';
4+
import type { Response } from '@rocket.chat/server-fetch';
45

56
import { callbacks } from '../../../../lib/callbacks';
67
import { settings } from '../../../settings/server';
@@ -140,12 +141,15 @@ export async function sendToCRM(
140141
const additionalData = getAdditionalFieldsByType(type, room);
141142
const responseData = Object.assign(postData, additionalData);
142143

143-
const response = await sendRequest(responseData);
144-
145-
if (response) {
144+
// do not wait for the request to be answered
145+
// this will avoid blocking the process of saving the message
146+
void sendRequest(responseData, 5, async (response: Response) => {
147+
if (!response) {
148+
return;
149+
}
146150
const responseData = await response.text();
147151
await LivechatRooms.saveCRMDataByRoomId(room._id, responseData);
148-
}
152+
});
149153

150154
return room;
151155
}
Lines changed: 26 additions & 9 deletions
Original file line numberDiff line numberDiff line change
@@ -1,20 +1,20 @@
11
import { serverFetch as fetch } from '@rocket.chat/server-fetch';
2+
import type { Response } from '@rocket.chat/server-fetch';
23

34
import { webhooksLogger } from './logger';
45
import { metrics } from '../../../metrics/server';
56
import { settings } from '../../../settings/server';
67

8+
const isRetryable = (status: number): boolean => status >= 500 || status === 429;
9+
710
export async function sendRequest(
811
postData: {
912
type: string;
1013
[key: string]: any;
1114
},
12-
attempts = 10,
15+
attempts = 5,
16+
cb?: (response: Response) => Promise<void>,
1317
) {
14-
if (!attempts) {
15-
webhooksLogger.error({ msg: 'Omnichannel webhook call failed. Max attempts reached' });
16-
return;
17-
}
1818
const timeout = settings.get<number>('Livechat_http_timeout');
1919
const secretToken = settings.get<string>('Livechat_secret_token');
2020
const webhookUrl = settings.get<string>('Livechat_webhookUrl');
@@ -31,18 +31,35 @@ export async function sendRequest(
3131

3232
if (result.status === 200) {
3333
metrics.totalLivechatWebhooksSuccess.inc();
34+
await cb?.(result);
3435
return result;
3536
}
3637

38+
if (!isRetryable(result.status)) {
39+
webhooksLogger.error({
40+
msg: `Non-retryable error response from webhook`,
41+
webhookUrl,
42+
status: result.status,
43+
response: await result.text(),
44+
});
45+
metrics.totalLivechatWebhooksFailures.inc();
46+
return;
47+
}
48+
3749
metrics.totalLivechatWebhooksFailures.inc();
3850
throw new Error(await result.text());
3951
} catch (err) {
4052
const retryAfter = timeout * 4;
41-
webhooksLogger.error({ msg: `Error response on ${11 - attempts} try ->`, err });
42-
// try 10 times after 20 seconds each
43-
attempts - 1 && webhooksLogger.warn({ msg: `Webhook call failed. Retrying`, newAttemptAfterSeconds: retryAfter / 1000, webhookUrl });
53+
webhooksLogger.debug({ msg: `Error response on ${6 - attempts} try ->`, err, newAttemptAfterSeconds: retryAfter / 1000, webhookUrl });
54+
const remainingAttempts = attempts - 1;
55+
// try 5 times after 20 seconds each
56+
if (!remainingAttempts) {
57+
webhooksLogger.error({ msg: 'Omnichannel webhook call failed. Max attempts reached' });
58+
return;
59+
}
60+
4461
setTimeout(async () => {
45-
await sendRequest(postData, attempts - 1);
62+
await sendRequest(postData, remainingAttempts, cb);
4663
}, retryAfter);
4764
}
4865
}
Lines changed: 292 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,292 @@
1+
import { expect } from 'chai';
2+
import { describe, it, afterEach } from 'mocha';
3+
import proxyquire from 'proxyquire';
4+
import sinon from 'sinon';
5+
6+
type FetchResponse = {
7+
status: number;
8+
text: () => Promise<string>;
9+
};
10+
11+
const mkResponse = (status: number, text = ''): FetchResponse => ({
12+
status,
13+
text: async () => text,
14+
});
15+
16+
const MODULE_PATH = '../../../../../../app/livechat/server/lib/webhooks';
17+
18+
function buildSubject(options?: {
19+
fetchSequence?: Array<{ status: number; text?: string }>;
20+
fetchOnce?: { status: number; text?: string };
21+
settings?: Partial<Record<'Livechat_http_timeout' | 'Livechat_secret_token' | 'Livechat_webhookUrl', any>>;
22+
}) {
23+
const defaults = {
24+
Livechat_http_timeout: 1000,
25+
Livechat_secret_token: 'super-secret',
26+
Livechat_webhookUrl: 'https://example.test/hook',
27+
};
28+
29+
const settingsValues = { ...defaults, ...(options?.settings ?? {}) };
30+
31+
const settings = {
32+
get: sinon.stub().callsFake((key: string) => settingsValues[key as keyof typeof settingsValues]),
33+
};
34+
35+
const fetchStub = sinon.stub();
36+
if (options?.fetchSequence && options.fetchSequence.length) {
37+
options.fetchSequence.forEach((spec, i) => {
38+
fetchStub.onCall(i).resolves(mkResponse(spec.status, spec.text));
39+
});
40+
} else if (options?.fetchOnce) {
41+
fetchStub.resolves(mkResponse(options.fetchOnce.status, options.fetchOnce.text));
42+
} else {
43+
fetchStub.resolves(mkResponse(200, 'ok'));
44+
}
45+
46+
const logger = {
47+
debug: sinon.spy(),
48+
error: sinon.spy(),
49+
};
50+
51+
const metrics = {
52+
totalLivechatWebhooksSuccess: { inc: sinon.spy() },
53+
totalLivechatWebhooksFailures: { inc: sinon.spy() },
54+
};
55+
56+
const { sendRequest } = proxyquire.noCallThru().load(MODULE_PATH, {
57+
'@rocket.chat/server-fetch': { serverFetch: fetchStub },
58+
'./logger': { webhooksLogger: logger },
59+
'../../../metrics/server': { metrics },
60+
'../../../settings/server': { settings },
61+
});
62+
63+
return {
64+
sendRequest,
65+
stubs: { fetchStub, logger, metrics, settings },
66+
values: settingsValues,
67+
};
68+
}
69+
70+
describe('livechat/server/lib/webhooks sendRequest', () => {
71+
let clock: sinon.SinonFakeTimers;
72+
73+
afterEach(() => {
74+
sinon.restore();
75+
if (clock) {
76+
clock.restore();
77+
}
78+
});
79+
80+
it('sends a POST request with correct options and resolves on 200', async () => {
81+
const postData = { type: 'TestEvent', a: 1 };
82+
const secret = 'my-secret';
83+
const timeout = 2500;
84+
const webhookUrl = 'https://example.com/webhook';
85+
86+
const { sendRequest, stubs, values } = buildSubject({
87+
fetchOnce: { status: 200, text: 'OK' },
88+
settings: {
89+
Livechat_secret_token: secret,
90+
Livechat_http_timeout: timeout,
91+
Livechat_webhookUrl: webhookUrl,
92+
},
93+
});
94+
95+
const cb = sinon.spy(async () => {
96+
/* noop */
97+
});
98+
99+
const result = await sendRequest(postData, 5, cb);
100+
101+
// fetch was called with correct URL and options
102+
expect(stubs.fetchStub.calledOnce).to.be.true;
103+
const [calledUrl, calledOpts] = stubs.fetchStub.getCall(0).args;
104+
expect(calledUrl).to.equal(values.Livechat_webhookUrl);
105+
expect(calledOpts).to.deep.include({
106+
method: 'POST',
107+
body: postData,
108+
timeout: values.Livechat_http_timeout,
109+
});
110+
expect(calledOpts.headers).to.have.property('X-RocketChat-Livechat-Token', secret);
111+
112+
// success metrics and callback
113+
expect(stubs.metrics.totalLivechatWebhooksSuccess.inc.calledOnce).to.be.true;
114+
expect(stubs.metrics.totalLivechatWebhooksFailures.inc.notCalled).to.be.true;
115+
expect(cb.calledOnce).to.be.true;
116+
117+
// result is the fetch response
118+
expect(result).to.be.ok;
119+
expect(result!.status).to.equal(200);
120+
121+
// debug logging invoked at least once
122+
expect(stubs.logger.debug.called).to.be.true;
123+
expect(stubs.logger.error.notCalled).to.be.true;
124+
});
125+
126+
it('omits X-RocketChat-Livechat-Token header when secret token is falsy', async () => {
127+
const { sendRequest, stubs } = buildSubject({
128+
fetchOnce: { status: 200, text: 'OK' },
129+
settings: {
130+
Livechat_secret_token: '',
131+
},
132+
});
133+
134+
await sendRequest({ type: 'NoSecret' });
135+
136+
const [, calledOpts] = stubs.fetchStub.getCall(0).args;
137+
expect(calledOpts.headers).to.not.have.property('X-RocketChat-Livechat-Token');
138+
});
139+
140+
it('logs and does not retry on non-retryable status (e.g., 400)', async () => {
141+
const { sendRequest, stubs } = buildSubject({
142+
fetchOnce: { status: 400, text: 'Bad Request' },
143+
});
144+
145+
const cb = sinon.spy(async () => {
146+
/* noop */
147+
});
148+
149+
const result = await sendRequest({ type: 'NonRetryable' }, 5, cb);
150+
151+
// Does not throw; returns undefined
152+
expect(result).to.be.undefined;
153+
154+
// fetch called only once, no retry
155+
expect(stubs.fetchStub.calledOnce).to.be.true;
156+
157+
// failure metric incremented
158+
expect(stubs.metrics.totalLivechatWebhooksFailures.inc.calledOnce).to.be.true;
159+
expect(stubs.metrics.totalLivechatWebhooksSuccess.inc.notCalled).to.be.true;
160+
161+
// callback not called on failure
162+
expect(cb.notCalled).to.be.true;
163+
164+
// error log with status and response
165+
expect(stubs.logger.error.called).to.be.true;
166+
const logArg = stubs.logger.error.getCall(0).args[0];
167+
expect(logArg).to.include.keys(['msg', 'status', 'response']);
168+
expect(logArg.status).to.equal(400);
169+
expect(logArg.response).to.equal('Bad Request');
170+
});
171+
172+
it('retries once on retryable status (e.g., 500) and succeeds on next attempt', async () => {
173+
clock = sinon.useFakeTimers();
174+
175+
const timeout = 1500;
176+
const retryAfter = timeout * 4;
177+
178+
const { sendRequest, stubs } = buildSubject({
179+
fetchSequence: [
180+
{ status: 500, text: 'Server Error' },
181+
{ status: 200, text: 'OK' },
182+
],
183+
settings: { Livechat_http_timeout: timeout },
184+
});
185+
186+
const cb = sinon.spy(async () => {
187+
/* noop */
188+
});
189+
190+
// Call initial request; it will schedule a retry
191+
await sendRequest({ type: 'RetryableOnce' }, 5, cb);
192+
expect(stubs.fetchStub.callCount).to.equal(1);
193+
expect(stubs.metrics.totalLivechatWebhooksFailures.inc.calledOnce).to.be.true;
194+
195+
// Advance clock to trigger the retry
196+
// use tickAsync to ensure promise microtasks inside the timer are flushed
197+
await clock.tickAsync(retryAfter);
198+
199+
// After retry, we should have a success
200+
expect(stubs.fetchStub.callCount).to.equal(2);
201+
expect(stubs.metrics.totalLivechatWebhooksSuccess.inc.calledOnce).to.be.true;
202+
203+
// callback called once for the successful attempt
204+
expect(cb.calledOnce).to.be.true;
205+
206+
// no terminal error log
207+
const errorMsgs = stubs.logger.error.getCalls().map((c: sinon.SinonSpyCall) => c.args[0]?.msg);
208+
expect(errorMsgs.some((m: string) => typeof m === 'string' && m.includes('Max attempts'))).to.be.false;
209+
});
210+
211+
it('stops after max attempts and logs final error for repeated retryable failures', async () => {
212+
clock = sinon.useFakeTimers();
213+
214+
const timeout = 500;
215+
const retryAfter = timeout * 4;
216+
217+
// Prepare 5 retryable failures (equal to attempts)
218+
const failures = Array.from({ length: 5 }, () => ({ status: 500, text: 'Upstream down' }));
219+
const { sendRequest, stubs } = buildSubject({
220+
fetchSequence: failures,
221+
settings: { Livechat_http_timeout: timeout },
222+
});
223+
224+
const postData = { type: 'AlwaysFail' };
225+
226+
// Fire the initial call
227+
await sendRequest(postData, 5);
228+
229+
// Process the 4 scheduled retries
230+
for (let i = 0; i < 4; i++) {
231+
// eslint-disable-next-line no-await-in-loop
232+
await clock.tickAsync(retryAfter);
233+
}
234+
235+
// fetch was attempted 5 times total
236+
expect(stubs.fetchStub.callCount).to.equal(5);
237+
238+
// failure metric incremented for each failed attempt
239+
expect(stubs.metrics.totalLivechatWebhooksFailures.inc.callCount).to.equal(5);
240+
expect(stubs.metrics.totalLivechatWebhooksSuccess.inc.notCalled).to.be.true;
241+
242+
// terminal error logged when max attempts reached
243+
expect(stubs.logger.error.called).to.be.true;
244+
const msgs = stubs.logger.error.getCalls().map((c: sinon.SinonSpyCall) => c.args[0]?.msg);
245+
expect(msgs.some((m: string) => typeof m === 'string' && m.includes('Max attempts'))).to.be.true;
246+
});
247+
248+
it('passes the Response to the callback on success', async () => {
249+
const responseText = 'Great Success';
250+
const { sendRequest, stubs } = buildSubject({
251+
fetchOnce: { status: 200, text: responseText },
252+
});
253+
254+
const cb = sinon.spy(async (res: FetchResponse) => {
255+
const txt = await res.text();
256+
expect(txt).to.equal(responseText);
257+
});
258+
259+
await sendRequest({ type: 'CbTest' }, 5, cb);
260+
261+
expect(cb.calledOnce).to.be.true;
262+
expect(stubs.metrics.totalLivechatWebhooksSuccess.inc.calledOnce).to.be.true;
263+
});
264+
265+
it('uses default attempts=5 and schedules retry with delay = timeout*4', async () => {
266+
clock = sinon.useFakeTimers();
267+
268+
const timeout = 1234;
269+
const retryAfter = timeout * 4;
270+
271+
const { sendRequest, stubs } = buildSubject({
272+
fetchSequence: [
273+
{ status: 500, text: 'fail' },
274+
{ status: 200, text: 'ok' },
275+
],
276+
settings: { Livechat_http_timeout: timeout },
277+
});
278+
279+
await sendRequest({ type: 'DelayCheck' });
280+
281+
// Before advancing time, only first attempt should have been made
282+
expect(stubs.fetchStub.callCount).to.equal(1);
283+
284+
// Advance just shy of the retryAfter - no retry should happen
285+
await clock.tickAsync(retryAfter - 1);
286+
expect(stubs.fetchStub.callCount).to.equal(1);
287+
288+
// Advance the remaining 1ms - retry should fire
289+
await clock.tickAsync(1);
290+
expect(stubs.fetchStub.callCount).to.equal(2);
291+
});
292+
});

0 commit comments

Comments
 (0)