Skip to content

Commit a9734ca

Browse files
authored
feat(server): Kubernetes health probes (#4464) (#4473)
1 parent 8b1a622 commit a9734ca

12 files changed

Lines changed: 427 additions & 8 deletions

File tree

engine/src/main/java/com/arcadedb/GlobalConfiguration.java

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -545,6 +545,10 @@ Enable diagnostic logging during vector graph build progress (heap/off-heap memo
545545

546546
SERVER_METRICS_LOGGING("arcadedb.serverMetrics.logging", SCOPE.SERVER, "True to enable metrics logging", Boolean.class, false),
547547

548+
SERVER_READINESS_REQUIRES_HA("arcadedb.server.readinessRequiresHA", SCOPE.SERVER,
549+
"When true and HA is active, /api/v1/ready also requires the node to have joined the Raft group and be caught up. Default false preserves current readiness behavior.",
550+
Boolean.class, false),
551+
548552
//paths
549553
SERVER_ROOT_PATH("arcadedb.server.rootPath", SCOPE.SERVER,
550554
"Root path in the file system where the server is looking for files. By default is the current directory", String.class,
Lines changed: 37 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,37 @@
1+
/*
2+
* Copyright © 2021-present Arcade Data Ltd (info@arcadedata.com)
3+
*
4+
* Licensed under the Apache License, Version 2.0 (the "License");
5+
* you may not use this file except in compliance with the License.
6+
* You may obtain a copy of the License at
7+
*
8+
* http://www.apache.org/licenses/LICENSE-2.0
9+
*
10+
* Unless required by applicable law or agreed to in writing, software
11+
* distributed under the License is distributed on an "AS IS" BASIS,
12+
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13+
* See the License for the specific language governing permissions and
14+
* limitations under the License.
15+
*
16+
* SPDX-FileCopyrightText: 2021-present Arcade Data Ltd (info@arcadedata.com)
17+
* SPDX-License-Identifier: Apache-2.0
18+
*/
19+
package com.arcadedb;
20+
21+
import org.junit.jupiter.api.Test;
22+
23+
import static org.assertj.core.api.Assertions.assertThat;
24+
25+
class GlobalConfigurationReadinessHATest {
26+
@Test
27+
void keyExistsAndDefaultsToFalse() {
28+
assertThat(GlobalConfiguration.SERVER_READINESS_REQUIRES_HA.getKey()).isEqualTo("arcadedb.server.readinessRequiresHA");
29+
assertThat(GlobalConfiguration.SERVER_READINESS_REQUIRES_HA.getValueAsBoolean()).isFalse();
30+
}
31+
32+
@Test
33+
void lookupByKeyResolvesConstant() {
34+
assertThat(GlobalConfiguration.findByKey("arcadedb.server.readinessRequiresHA"))
35+
.isSameAs(GlobalConfiguration.SERVER_READINESS_REQUIRES_HA);
36+
}
37+
}

k8s/README.md

Lines changed: 32 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -11,3 +11,35 @@ helm repo add arcadedb https://helm.arcadedb.com/
1111
helm repo update
1212
helm install my-arcadedb arcadedb/arcadedb
1313
```
14+
15+
## Health probes
16+
17+
ArcadeDB exposes two HTTP probe endpoints on the API port (default `2480`):
18+
19+
- `GET /api/v1/health` - liveness. Returns `204` when the process and HTTP layer are up.
20+
Performs no database I/O and requires no authentication. Use this for `livenessProbe` so
21+
Kubernetes never restarts a node that is merely warming up.
22+
- `GET /api/v1/ready` - readiness. Returns `204` when the server is `ONLINE`. When
23+
`arcadedb.server.readinessRequiresHA=true` and HA is active, it returns `503` until the node
24+
has joined the Raft group and caught up. Default (`false`) preserves single-node readiness
25+
behavior.
26+
27+
```yaml
28+
livenessProbe:
29+
httpGet:
30+
path: /api/v1/health
31+
port: 2480
32+
initialDelaySeconds: 10
33+
periodSeconds: 10
34+
failureThreshold: 3
35+
readinessProbe:
36+
httpGet:
37+
path: /api/v1/ready
38+
port: 2480
39+
initialDelaySeconds: 5
40+
periodSeconds: 5
41+
failureThreshold: 3
42+
```
43+
44+
To make readiness HA-aware in a clustered deployment, set the environment variable
45+
`ARCADEDB_SERVER_READINESSREQUIRESHA=true` (or pass `-Darcadedb.server.readinessRequiresHA=true`).

load-tests/src/test/java/com/arcadedb/test/support/ContainersTestTemplate.java

Lines changed: 5 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -389,7 +389,7 @@ protected GenericContainer<?> createArcadeContainer(String name, Network network
389389
""", name))
390390
.withEnv("ARCADEDB_OPTS_MEMORY", "-Xms2G -Xmx2G")
391391
.withCreateContainerCmdModifier(cmd -> cmd.getHostConfig().withMemory(3L * 1024 * 1024 * 1024))
392-
.waitingFor(Wait.forHttp("/api/v1/ready").forPort(2480).forStatusCode(204));
392+
.waitingFor(Wait.forHttp("/api/v1/health").forPort(2480).forStatusCode(204));
393393
containers.add(container);
394394
return container;
395395
}
@@ -428,14 +428,14 @@ protected GenericContainer<?> createArcadeContainer(
428428
-Darcadedb.backup.enabled=false
429429
-Darcadedb.typeDefaultBuckets=10
430430
-Darcadedb.ha.enabled=true
431-
-Darcadedb.ha.implementation=raft
432431
-Darcadedb.ha.quorum=%s
432+
-Darcadedb.server.readinessRequiresHA=true
433433
-Darcadedb.ha.raft.port=2434
434434
-Darcadedb.ha.serverList=%s
435435
""", name, quorum, serverList))
436436
.withEnv("ARCADEDB_OPTS_MEMORY", "-Xms2G -Xmx2G")
437437
.withCreateContainerCmdModifier(cmd -> cmd.getHostConfig().withMemory(3L * 1024 * 1024 * 1024))
438-
.waitingFor(Wait.forHttp("/api/v1/ready").forPort(2480).forStatusCode(204));
438+
.waitingFor(Wait.forHttp("/api/v1/health").forPort(2480).forStatusCode(204));
439439
containers.add(container);
440440
return container;
441441
}
@@ -472,14 +472,14 @@ protected GenericContainer<?> createPersistentArcadeContainer(
472472
-Darcadedb.backup.enabled=false
473473
-Darcadedb.typeDefaultBuckets=10
474474
-Darcadedb.ha.enabled=true
475-
-Darcadedb.ha.implementation=raft
475+
-Darcadedb.server.readinessRequiresHA=true
476476
-Darcadedb.ha.quorum=%s
477477
-Darcadedb.ha.raft.port=2434
478478
-Darcadedb.ha.serverList=%s
479479
""", name, quorum, serverList))
480480
.withEnv("ARCADEDB_OPTS_MEMORY", "-Xms2G -Xmx2G")
481481
.withCreateContainerCmdModifier(cmd -> cmd.getHostConfig().withMemory(3L * 1024 * 1024 * 1024))
482-
.waitingFor(Wait.forHttp("/api/v1/ready").forPort(2480).forStatusCode(204));
482+
.waitingFor(Wait.forHttp("/api/v1/health").forPort(2480).forStatusCode(204));
483483
containers.add(container);
484484
return container;
485485
}

server/src/main/java/com/arcadedb/server/http/HttpServer.java

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -35,6 +35,7 @@
3535
import com.arcadedb.server.http.handler.GetUsersHandler;
3636
import com.arcadedb.server.http.handler.GetDynamicContentHandler;
3737
import com.arcadedb.server.http.handler.GetExistsDatabaseHandler;
38+
import com.arcadedb.server.http.handler.GetHealthHandler;
3839
import com.arcadedb.server.http.handler.GetOpenApiHandler;
3940
import com.arcadedb.server.http.handler.GetQueryHandler;
4041
import com.arcadedb.server.http.handler.GetReadyHandler;
@@ -230,6 +231,7 @@ private PathHandler setupRoutes() {
230231
.get("/server", new GetServerHandler(this))
231232
.post("/server", new PostServerCommandHandler(this))
232233
.get("/ready", new GetReadyHandler(this))
234+
.get("/health", new GetHealthHandler(this))
233235
.get("/openapi.json", new GetOpenApiHandler(this))
234236
.get("/docs", new GetApiDocsHandler(this))
235237
.get("/server/api-tokens", new GetApiTokensHandler(this))
Lines changed: 51 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,51 @@
1+
/*
2+
* Copyright © 2021-present Arcade Data Ltd (info@arcadedata.com)
3+
*
4+
* Licensed under the Apache License, Version 2.0 (the "License");
5+
* you may not use this file except in compliance with the License.
6+
* You may obtain a copy of the License at
7+
*
8+
* http://www.apache.org/licenses/LICENSE-2.0
9+
*
10+
* Unless required by applicable law or agreed to in writing, software
11+
* distributed under the License is distributed on an "AS IS" BASIS,
12+
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13+
* See the License for the specific language governing permissions and
14+
* limitations under the License.
15+
*
16+
* SPDX-FileCopyrightText: 2021-present Arcade Data Ltd (info@arcadedata.com)
17+
* SPDX-License-Identifier: Apache-2.0
18+
*/
19+
package com.arcadedb.server.http.handler;
20+
21+
import com.arcadedb.serializer.json.JSONObject;
22+
import com.arcadedb.server.http.HttpServer;
23+
import com.arcadedb.server.security.ServerSecurityUser;
24+
import io.micrometer.core.instrument.Metrics;
25+
import io.undertow.server.HttpServerExchange;
26+
27+
/**
28+
* Kubernetes liveness probe. Reports whether the server process and HTTP layer are up.
29+
* Performs no database I/O and requires no authentication so an orchestrator can poll it
30+
* cheaply. Distinct from readiness: a node that is merely warming up is still live and must
31+
* not be killed by the orchestrator.
32+
*/
33+
public class GetHealthHandler extends AbstractServerHttpHandler {
34+
public GetHealthHandler(final HttpServer httpServer) {
35+
super(httpServer);
36+
}
37+
38+
@Override
39+
public ExecutionResponse execute(final HttpServerExchange exchange, final ServerSecurityUser user, final JSONObject payload) {
40+
Metrics.counter("http.health").increment();
41+
42+
// Liveness only: reaching this handler proves the HTTP layer is up, so the process is live.
43+
// It deliberately does not consult server status, so a node still warming up is not killed.
44+
return new ExecutionResponse(204, "");
45+
}
46+
47+
@Override
48+
public boolean isRequireAuthentication() {
49+
return false;
50+
}
51+
}

server/src/main/java/com/arcadedb/server/http/handler/GetReadyHandler.java

Lines changed: 16 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -18,8 +18,10 @@
1818
*/
1919
package com.arcadedb.server.http.handler;
2020

21+
import com.arcadedb.GlobalConfiguration;
2122
import com.arcadedb.serializer.json.JSONObject;
2223
import com.arcadedb.server.ArcadeDBServer;
24+
import com.arcadedb.server.HAServerPlugin;
2325
import com.arcadedb.server.http.HttpServer;
2426
import com.arcadedb.server.security.ServerSecurityUser;
2527
import io.micrometer.core.instrument.Metrics;
@@ -34,9 +36,20 @@ public GetReadyHandler(final HttpServer httpServer) {
3436
public ExecutionResponse execute(final HttpServerExchange exchange, final ServerSecurityUser user, final JSONObject payload) {
3537
Metrics.counter("http.ready").increment();
3638

37-
if (httpServer.getServer().getStatus() == ArcadeDBServer.STATUS.ONLINE)
38-
return new ExecutionResponse(204, "");
39-
return new ExecutionResponse(503, "Server not started yet");
39+
final ArcadeDBServer server = httpServer.getServer();
40+
if (server.getStatus() != ArcadeDBServer.STATUS.ONLINE)
41+
return new ExecutionResponse(503, "Server not started yet");
42+
43+
if (server.getConfiguration().getValueAsBoolean(GlobalConfiguration.SERVER_READINESS_REQUIRES_HA)
44+
&& server.getConfiguration().getValueAsBoolean(GlobalConfiguration.HA_ENABLED)) {
45+
final HAServerPlugin ha = server.getHA();
46+
// ELECTION_STATUS.DONE means a leader is known; it does not guarantee this follower has
47+
// replicated all committed log entries, so a slow follower may report ready before catch-up.
48+
if (ha == null || ha.getElectionStatus() != HAServerPlugin.ELECTION_STATUS.DONE)
49+
return new ExecutionResponse(503, "Node has not yet joined the Raft group");
50+
}
51+
52+
return new ExecutionResponse(204, "");
4053
}
4154

4255
@Override

server/src/main/java/com/arcadedb/server/http/handler/OpenApiSpecGenerator.java

Lines changed: 27 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -141,6 +141,7 @@ private Paths createPaths() {
141141
// Server endpoints
142142
paths.addPathItem("/api/v1/server", createServerPath());
143143
paths.addPathItem("/api/v1/ready", createReadyPath());
144+
paths.addPathItem("/api/v1/health", createHealthPath());
144145
paths.addPathItem("/api/v1/databases", createDatabasesPath());
145146

146147
// Database endpoints
@@ -216,6 +217,32 @@ private PathItem createReadyPath() {
216217
return pathItem;
217218
}
218219

220+
private PathItem createHealthPath() {
221+
final PathItem pathItem = new PathItem();
222+
223+
final Operation getOp = new Operation();
224+
getOp.setSummary("Check server liveness");
225+
getOp.setDescription("Liveness probe: returns 204 when the server process and HTTP layer are up. Performs no database I/O and requires no authentication.");
226+
getOp.setOperationId("checkHealth");
227+
getOp.addTagsItem("Health");
228+
getOp.setResponses(createHealthResponses());
229+
pathItem.setGet(getOp);
230+
231+
return pathItem;
232+
}
233+
234+
private ApiResponses createHealthResponses() {
235+
final ApiResponses responses = new ApiResponses();
236+
237+
// Liveness only ever responds with a 2xx when reachable; it never returns 503 (unlike readiness).
238+
final ApiResponse liveResponse = new ApiResponse();
239+
liveResponse.setDescription("Server process and HTTP layer are up");
240+
responses.addApiResponse("200", liveResponse);
241+
responses.addApiResponse("204", liveResponse);
242+
243+
return responses;
244+
}
245+
219246
private PathItem createDatabasesPath() {
220247
final PathItem pathItem = new PathItem();
221248

Lines changed: 46 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,46 @@
1+
/*
2+
* Copyright © 2021-present Arcade Data Ltd (info@arcadedata.com)
3+
*
4+
* Licensed under the Apache License, Version 2.0 (the "License");
5+
* you may not use this file except in compliance with the License.
6+
* You may obtain a copy of the License at
7+
*
8+
* http://www.apache.org/licenses/LICENSE-2.0
9+
*
10+
* Unless required by applicable law or agreed to in writing, software
11+
* distributed under the License is distributed on an "AS IS" BASIS,
12+
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13+
* See the License for the specific language governing permissions and
14+
* limitations under the License.
15+
*
16+
* SPDX-FileCopyrightText: 2021-present Arcade Data Ltd (info@arcadedata.com)
17+
* SPDX-License-Identifier: Apache-2.0
18+
*/
19+
package com.arcadedb.server.http;
20+
21+
import com.arcadedb.server.http.handler.ExecutionResponse;
22+
import com.arcadedb.server.http.handler.GetHealthHandler;
23+
import com.arcadedb.server.security.ServerSecurityUser;
24+
import org.junit.jupiter.api.Test;
25+
26+
import static org.assertj.core.api.Assertions.assertThat;
27+
import static org.mockito.Mockito.mock;
28+
29+
class GetHealthHandlerTest {
30+
31+
@Test
32+
void alwaysReturns204() throws Exception {
33+
// Liveness only: if the handler runs at all, the HTTP layer is up. Server status is irrelevant.
34+
final GetHealthHandler handler = new GetHealthHandler(mock(HttpServer.class));
35+
36+
final ExecutionResponse response = handler.execute(null, (ServerSecurityUser) null, null);
37+
assertThat(response.getCode()).isEqualTo(204);
38+
}
39+
40+
@Test
41+
void doesNotRequireAuthentication() {
42+
final GetHealthHandler handler = new GetHealthHandler(mock(HttpServer.class));
43+
44+
assertThat(handler.isRequireAuthentication()).isFalse();
45+
}
46+
}

0 commit comments

Comments
 (0)