Skip to content

Commit 00c52d9

Browse files
committed
Merge branch 'bugfix/CLDSRV-836-deep-healthcheck' into q/9.2
2 parents f204ad4 + 6dd43a5 commit 00c52d9

File tree

2 files changed

+254
-9
lines changed

2 files changed

+254
-9
lines changed

lib/utilities/healthcheckHandler.js

Lines changed: 19 additions & 9 deletions
Original file line numberDiff line numberDiff line change
@@ -56,20 +56,30 @@ function clientCheck(flightCheckOnStartUp, log, cb) {
5656
}
5757
});
5858
async.parallel(clientTasks, (err, results) => {
59-
let fail = false;
6059
// obj will be an object of the healthcheck results of
6160
// every backends. No errors were returned directly to
6261
// async.parallel in order to complete the check, so a
6362
// manual check makes S3 return 500 error if any backend failed
6463
// other than aws_s3 or azure
65-
const obj = results.reduce((obj, item) => Object.assign(obj, item), {});
66-
// fail only if *all* backend fail, so that we can still serve the ones which are working
67-
fail = Object.keys(obj).every(k =>
68-
// if there is an error from an external backend,
69-
// only return a 500 if it is on startup
70-
// (flightCheckOnStartUp set to true)
71-
obj[k].error && (flightCheckOnStartUp || !obj[k].external)
72-
);
64+
// Check if any client has ALL its backends/locations failing.
65+
// Each result in the array represents one client (data, metadata, vault, kms)
66+
// with its backend locations as keys.
67+
// If ALL backend/locations of ANY client have errors, the overall check fails.
68+
const { obj, fail } = results.reduce((acc, clientResult) => {
69+
Object.assign(acc.obj, clientResult);
70+
71+
// Check if ALL backends/locations of this client have errors
72+
const keys = Object.keys(clientResult);
73+
// eslint-disable-next-line no-param-reassign
74+
acc.fail ||= keys.length > 0 && keys.every(k =>
75+
// if there is an error from an external backend,
76+
// only return a 500 if it is on startup
77+
// (flightCheckOnStartUp set to true)
78+
clientResult[k].error && (flightCheckOnStartUp || !clientResult[k].external)
79+
);
80+
81+
return acc;
82+
}, { obj: {}, fail: false });
7383
if (fail) {
7484
return cb(errors.InternalError, obj);
7585
}
Lines changed: 235 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,235 @@
1+
const assert = require('assert');
2+
const sinon = require('sinon');
3+
const { errors } = require('arsenal');
4+
5+
const { clientCheck } = require('../../../lib/utilities/healthcheckHandler');
6+
const { DummyRequestLogger } = require('../helpers');
7+
const { data } = require('../../../lib/data/wrapper');
8+
const metadata = require('../../../lib/metadata/wrapper');
9+
const vault = require('../../../lib/auth/vault');
10+
const kms = require('../../../lib/kms/wrapper');
11+
12+
const log = new DummyRequestLogger();
13+
14+
describe('clientCheck - failure detection logic', () => {
15+
let dataStub;
16+
let metadataStub;
17+
let vaultStub;
18+
let kmsStub;
19+
20+
beforeEach(() => {
21+
// Create stubs for all client checkHealth methods
22+
dataStub = sinon.stub(data, 'checkHealth');
23+
metadataStub = sinon.stub(metadata, 'checkHealth');
24+
vaultStub = sinon.stub(vault, 'checkHealth');
25+
kmsStub = sinon.stub(kms, 'checkHealth');
26+
});
27+
28+
afterEach(() => {
29+
sinon.restore();
30+
});
31+
32+
it('should succeed when all backends are healthy', done => {
33+
dataStub.callsFake((log, cb) => cb(null, {
34+
'sproxyd-loc1': { code: 200, message: 'OK' },
35+
'sproxyd-loc2': { code: 200, message: 'OK' },
36+
}));
37+
metadataStub.callsFake((log, cb) => cb(null, {
38+
metadata: { code: 200, message: 'OK' },
39+
}));
40+
vaultStub.callsFake((log, cb) => cb(null, {
41+
vault: { code: 200, message: 'OK' },
42+
}));
43+
kmsStub.callsFake((log, cb) => cb(null, {
44+
kms: { code: 200, message: 'OK' },
45+
}));
46+
47+
clientCheck(false, log, (err, result) => {
48+
assert.ifError(err);
49+
assert.deepStrictEqual(result, {
50+
'sproxyd-loc1': { code: 200, message: 'OK' },
51+
'sproxyd-loc2': { code: 200, message: 'OK' },
52+
metadata: { code: 200, message: 'OK' },
53+
vault: { code: 200, message: 'OK' },
54+
kms: { code: 200, message: 'OK' },
55+
});
56+
done();
57+
});
58+
});
59+
60+
it('should fail when ALL backends of data client fail while metadata is healthy', done => {
61+
dataStub.callsFake((log, cb) => cb(null, {
62+
'sproxyd-loc1': { error: errors.InternalError, code: 500 },
63+
'sproxyd-loc2': { error: errors.InternalError, code: 500 },
64+
}));
65+
metadataStub.callsFake((log, cb) => cb(null, {
66+
metadata: { code: 200, message: 'OK' },
67+
}));
68+
vaultStub.callsFake((log, cb) => cb(null, {
69+
vault: { code: 200, message: 'OK' },
70+
}));
71+
kmsStub.callsFake((log, cb) => cb(null, {
72+
kms: { code: 200, message: 'OK' },
73+
}));
74+
75+
clientCheck(false, log, (err, result) => {
76+
assert(err);
77+
assert.strictEqual(err.InternalError, true);
78+
assert.deepStrictEqual(result, {
79+
'sproxyd-loc1': { error: errors.InternalError, code: 500 },
80+
'sproxyd-loc2': { error: errors.InternalError, code: 500 },
81+
metadata: { code: 200, message: 'OK' },
82+
vault: { code: 200, message: 'OK' },
83+
kms: { code: 200, message: 'OK' },
84+
});
85+
done();
86+
});
87+
});
88+
89+
it('should succeed when ONE data location fails but another is healthy', done => {
90+
dataStub.callsFake((log, cb) => cb(null, {
91+
'sproxyd-loc1': { error: errors.InternalError, code: 500 },
92+
'sproxyd-loc2': { code: 200, message: 'OK' },
93+
}));
94+
metadataStub.callsFake((log, cb) => cb(null, {
95+
metadata: { code: 200, message: 'OK' },
96+
}));
97+
vaultStub.callsFake((log, cb) => cb(null, {
98+
vault: { code: 200, message: 'OK' },
99+
}));
100+
kmsStub.callsFake((log, cb) => cb(null, {
101+
kms: { code: 200, message: 'OK' },
102+
}));
103+
104+
clientCheck(false, log, (err, result) => {
105+
assert.ifError(err);
106+
assert.deepStrictEqual(result, {
107+
'sproxyd-loc1': { error: errors.InternalError, code: 500 },
108+
'sproxyd-loc2': { code: 200, message: 'OK' },
109+
metadata: { code: 200, message: 'OK' },
110+
vault: { code: 200, message: 'OK' },
111+
kms: { code: 200, message: 'OK' },
112+
});
113+
done();
114+
});
115+
});
116+
117+
it('should fail when ALL backends of multiple clients fail', done => {
118+
dataStub.callsFake((log, cb) => cb(null, {
119+
'sproxyd-loc1': { error: errors.InternalError, code: 500 },
120+
'sproxyd-loc2': { error: errors.InternalError, code: 500 },
121+
}));
122+
metadataStub.callsFake((log, cb) => cb(null, {
123+
metadata: { error: errors.InternalError, code: 500 },
124+
}));
125+
vaultStub.callsFake((log, cb) => cb(null, {
126+
vault: { code: 200, message: 'OK' },
127+
}));
128+
kmsStub.callsFake((log, cb) => cb(null, {
129+
kms: { code: 200, message: 'OK' },
130+
}));
131+
132+
clientCheck(false, log, (err, result) => {
133+
assert(err);
134+
assert.strictEqual(err.InternalError, true);
135+
assert.deepStrictEqual(result, {
136+
'sproxyd-loc1': { error: errors.InternalError, code: 500 },
137+
'sproxyd-loc2': { error: errors.InternalError, code: 500 },
138+
metadata: { error: errors.InternalError, code: 500 },
139+
vault: { code: 200, message: 'OK' },
140+
kms: { code: 200, message: 'OK' },
141+
});
142+
done();
143+
});
144+
});
145+
146+
it('should succeed when client returns empty result', done => {
147+
dataStub.callsFake((log, cb) => cb(null, {}));
148+
metadataStub.callsFake((log, cb) => cb(null, {
149+
metadata: { code: 200, message: 'OK' },
150+
}));
151+
vaultStub.callsFake((log, cb) => cb(null, {
152+
vault: { code: 200, message: 'OK' },
153+
}));
154+
kmsStub.callsFake((log, cb) => cb(null, {
155+
kms: { code: 200, message: 'OK' },
156+
}));
157+
158+
clientCheck(false, log, (err, result) => {
159+
assert.ifError(err);
160+
assert.deepStrictEqual(result, {
161+
metadata: { code: 200, message: 'OK' },
162+
vault: { code: 200, message: 'OK' },
163+
kms: { code: 200, message: 'OK' },
164+
});
165+
done();
166+
});
167+
});
168+
169+
describe('external backend error handling', () => {
170+
it('should NOT fail on external backend errors during normal operation ' +
171+
'(flightCheckOnStartUp=false)', done => {
172+
dataStub.callsFake((log, cb) => cb(null, {
173+
's3-backend': { error: errors.InternalError, code: 500, external: true },
174+
}));
175+
metadataStub.callsFake((log, cb) => cb(null, {
176+
metadata: { code: 200, message: 'OK' },
177+
}));
178+
vaultStub.callsFake((log, cb) => cb(null, {}));
179+
kmsStub.callsFake((log, cb) => cb(null, {}));
180+
181+
clientCheck(false, log, (err, result) => {
182+
assert.ifError(err);
183+
assert.deepStrictEqual(result, {
184+
's3-backend': { error: errors.InternalError, code: 500, external: true },
185+
metadata: { code: 200, message: 'OK' },
186+
});
187+
done();
188+
});
189+
});
190+
191+
it('should fail on external backend errors during startup (flightCheckOnStartUp=true)', done => {
192+
dataStub.callsFake((log, cb) => cb(null, {
193+
's3-backend': { error: errors.InternalError, code: 500, external: true },
194+
}));
195+
metadataStub.callsFake((log, cb) => cb(null, {
196+
metadata: { code: 200, message: 'OK' },
197+
}));
198+
vaultStub.callsFake((log, cb) => cb(null, {}));
199+
kmsStub.callsFake((log, cb) => cb(null, {}));
200+
201+
clientCheck(true, log, (err, result) => {
202+
assert(err);
203+
assert.strictEqual(err.InternalError, true);
204+
assert.deepStrictEqual(result, {
205+
's3-backend': { error: errors.InternalError, code: 500, external: true },
206+
metadata: { code: 200, message: 'OK' },
207+
});
208+
done();
209+
});
210+
});
211+
212+
it('should succeed when external backend fails but internal backend is healthy ' +
213+
'(flightCheckOnStartUp=false)', done => {
214+
dataStub.callsFake((log, cb) => cb(null, {
215+
'sproxyd-loc1': { code: 200, message: 'OK' },
216+
's3-backend': { error: errors.InternalError, code: 500, external: true },
217+
}));
218+
metadataStub.callsFake((log, cb) => cb(null, {
219+
metadata: { code: 200, message: 'OK' },
220+
}));
221+
vaultStub.callsFake((log, cb) => cb(null, {}));
222+
kmsStub.callsFake((log, cb) => cb(null, {}));
223+
224+
clientCheck(false, log, (err, result) => {
225+
assert.ifError(err);
226+
assert.deepStrictEqual(result, {
227+
'sproxyd-loc1': { code: 200, message: 'OK' },
228+
's3-backend': { error: errors.InternalError, code: 500, external: true },
229+
metadata: { code: 200, message: 'OK' },
230+
});
231+
done();
232+
});
233+
});
234+
});
235+
});

0 commit comments

Comments
 (0)