Skip to content

Commit 04e2a23

Browse files
committed
fix: proactor_threads is respected in k8s
- Honor --proactor_threads flag over cgroup-derived CPU limit - Log when flag overrides cgroup value (helps debugging in production) - Add CI job to verify behavior under real Docker cgroup limits
1 parent 5db9666 commit 04e2a23

3 files changed

Lines changed: 128 additions & 0 deletions

File tree

.github/workflows/ci.yml

Lines changed: 53 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -187,6 +187,59 @@ jobs:
187187
name: regression_logs
188188
path: /tmp/failed/*
189189

190+
- name: Upload dragonfly binary for cgroup test
191+
if: matrix.container == 'ubuntu-dev:24' && matrix.build-type == 'Release' && matrix.sanitizers == 'NoSanitizers' && matrix.compiler.cxx == 'g++'
192+
uses: actions/upload-artifact@v7
193+
with:
194+
name: dragonfly-binary
195+
path: ${{ github.workspace }}/build/dragonfly
196+
retention-days: 1
197+
198+
cgroup-thread-detection:
199+
runs-on: ubuntu-latest
200+
needs: [build]
201+
steps:
202+
- uses: actions/checkout@v6
203+
204+
- uses: actions/download-artifact@v7
205+
with:
206+
name: dragonfly-binary
207+
path: build
208+
209+
- name: Test cgroup CPU auto-detection
210+
run: |
211+
chmod +x build/dragonfly
212+
213+
# Scenario 1: --cpus=2, no flag -> should auto-detect 2 threads
214+
CID=$(docker run --rm -d --cpus=2 --security-opt seccomp=unconfined \
215+
-v "$PWD/build/dragonfly:/dragonfly" -p 16380:6379 ubuntu:24.04 \
216+
/dragonfly --port 6379 --maxmemory 1G --dbfilename "" --noversion_check)
217+
218+
for i in $(seq 1 20); do
219+
sleep 1
220+
redis-cli -p 16380 PING 2>/dev/null | grep -q PONG && break
221+
done
222+
223+
THREADS=$(redis-cli -p 16380 INFO server | grep -oP 'thread_count:\K\d+')
224+
docker rm -f "$CID"
225+
echo "Scenario 1: expected 2, got $THREADS"
226+
[ "$THREADS" -eq 2 ]
227+
228+
# Scenario 2: --cpus=2 + --proactor_threads=4 -> flag must win
229+
CID=$(docker run --rm -d --cpus=2 --security-opt seccomp=unconfined \
230+
-v "$PWD/build/dragonfly:/dragonfly" -p 16381:6379 ubuntu:24.04 \
231+
/dragonfly --port 6379 --proactor_threads 4 --maxmemory 1G --dbfilename "" --noversion_check)
232+
233+
for i in $(seq 1 20); do
234+
sleep 1
235+
redis-cli -p 16381 PING 2>/dev/null | grep -q PONG && break
236+
done
237+
238+
THREADS=$(redis-cli -p 16381 INFO server | grep -oP 'thread_count:\K\d+')
239+
docker rm -f "$CID"
240+
echo "Scenario 2: expected 4, got $THREADS"
241+
[ "$THREADS" -eq 4 ]
242+
190243
lint-test-chart:
191244
runs-on: ubuntu-latest
192245
needs: [build]

src/server/dfly_main.cc

Lines changed: 9 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -82,6 +82,7 @@ ABSL_DECLARE_FLAG(uint32_t, memcached_port);
8282
ABSL_DECLARE_FLAG(uint16_t, admin_port);
8383
ABSL_DECLARE_FLAG(std::string, admin_bind);
8484
ABSL_DECLARE_FLAG(strings::MemoryBytesFlag, maxmemory);
85+
ABSL_DECLARE_FLAG(uint32_t, proactor_threads);
8586

8687
ABSL_FLAG(string, bind, "",
8788
"Bind address. If empty - binds on all interfaces. "
@@ -1080,6 +1081,14 @@ Usage: dragonfly [FLAGS]
10801081

10811082
#ifdef __linux__
10821083
UpdateResourceLimitsIfInsideContainer(&mem_info, &max_available_threads);
1084+
// If --proactor_threads (or DFLY_proactor_threads env var) was explicitly set by the user,
1085+
// honor it over the cgroup-derived CPU limit. The flag defaults to 0, so any non-zero value
1086+
// means the user explicitly requested a specific thread count.
1087+
if (absl::GetFlag(FLAGS_proactor_threads) > 0) {
1088+
LOG(INFO) << "Using proactor_threads=" << absl::GetFlag(FLAGS_proactor_threads)
1089+
<< " (overriding cgroup-derived " << max_available_threads << ")";
1090+
max_available_threads = 0; // causes ProactorPool to use FLAGS_proactor_threads
1091+
}
10831092
#endif
10841093

10851094
if (mem_info.swap_total != 0)
Lines changed: 66 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,66 @@
1+
"""
2+
Tests that --proactor_threads is respected even inside containers with CPU limits.
3+
4+
Background: UpdateResourceLimitsIfInsideContainer() reads the cgroup CPU quota and
5+
passes it as pool_size to ProactorPool. When pool_size != 0, helio's ProactorPool
6+
ignores FLAGS_proactor_threads entirely. The fix resets pool_size to 0 when
7+
--proactor_threads (or DFLY_proactor_threads) is explicitly set, so the flag wins.
8+
"""
9+
10+
import pytest
11+
from .instance import DflyInstanceFactory
12+
13+
14+
@pytest.mark.asyncio
15+
async def test_proactor_threads_flag_is_respected(df_factory: DflyInstanceFactory):
16+
"""Starting with --proactor_threads=N must result in exactly N threads."""
17+
server = df_factory.create(proactor_threads=3)
18+
server.start()
19+
client = server.client()
20+
try:
21+
info = await client.info("server")
22+
assert int(info["thread_count"]) == 3
23+
finally:
24+
await client.aclose()
25+
server.stop()
26+
27+
28+
@pytest.mark.asyncio
29+
async def test_proactor_threads_env_var_is_respected(df_factory: DflyInstanceFactory, monkeypatch):
30+
"""DFLY_proactor_threads env var must behave identically to the CLI flag.
31+
32+
The subprocess inherits the parent environment, so setting the variable here
33+
is equivalent to setting it in a Kubernetes pod's env: section.
34+
"""
35+
monkeypatch.setenv("DFLY_proactor_threads", "2")
36+
# No proactor_threads kwarg — only the env var should drive the thread count.
37+
server = df_factory.create()
38+
server.start()
39+
client = server.client()
40+
try:
41+
info = await client.info("server")
42+
assert int(info["thread_count"]) == 2
43+
finally:
44+
await client.aclose()
45+
server.stop()
46+
47+
48+
@pytest.mark.asyncio
49+
async def test_proactor_threads_flag_overrides_env_var(
50+
df_factory: DflyInstanceFactory, monkeypatch
51+
):
52+
"""CLI flag takes priority over DFLY_proactor_threads env var.
53+
54+
ParseFlagsFromEnv() skips env vars when the flag was already set on the
55+
command line (WasPresentOnCommandLine check), so the CLI value must win.
56+
"""
57+
monkeypatch.setenv("DFLY_proactor_threads", "2")
58+
server = df_factory.create(proactor_threads=4)
59+
server.start()
60+
client = server.client()
61+
try:
62+
info = await client.info("server")
63+
assert int(info["thread_count"]) == 4
64+
finally:
65+
await client.aclose()
66+
server.stop()

0 commit comments

Comments
 (0)