Skip to content

Commit 567f353

Browse files
authored
fix(proactor_threads): fix that proactor_threads is not respected in k8s env (#7030)
fix: proactor_threads is respected in k8s
1 parent 0277e67 commit 567f353

3 files changed

Lines changed: 151 additions & 0 deletions

File tree

.github/workflows/ci.yml

Lines changed: 76 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -187,6 +187,82 @@ jobs:
187187
name: regression_logs
188188
path: /tmp/failed/*
189189

190+
- name: Upload dragonfly binary for cgroup test
191+
if: matrix.container == 'ubuntu-dev:24' && matrix.build-type == 'Release' && matrix.sanitizers == 'NoSanitizers' && matrix.compiler.cxx == 'g++'
192+
uses: actions/upload-artifact@v7
193+
with:
194+
name: dragonfly-binary
195+
path: ${{ github.workspace }}/build/dragonfly
196+
retention-days: 1
197+
198+
cgroup-thread-detection:
199+
runs-on: ubuntu-latest
200+
needs: [build]
201+
steps:
202+
- uses: actions/checkout@v6
203+
204+
- uses: actions/download-artifact@v7
205+
with:
206+
name: dragonfly-binary
207+
path: build
208+
209+
- name: Install redis-tools
210+
run: sudo apt-get update && sudo apt-get install -y redis-tools
211+
212+
- name: Prepare cgroup test image
213+
run: |
214+
chmod +x build/dragonfly
215+
# Verify the binary's runtime dependencies and install them in a local image.
216+
# The binary is built in ubuntu-dev:24 and links dynamically against Boost,
217+
# which is not present in the minimal ubuntu:24.04 image.
218+
echo "Runtime deps of dragonfly binary:"
219+
ldd build/dragonfly
220+
docker build -t df-cgroup-test - <<'EOF'
221+
FROM ubuntu:24.04
222+
RUN apt-get update -qq && \
223+
apt-get install -y --no-install-recommends libboost-context1.83.0 libssl3 && \
224+
rm -rf /var/lib/apt/lists/*
225+
EOF
226+
227+
- name: Test cgroup CPU auto-detection
228+
run: |
229+
wait_for_server() {
230+
local port=$1
231+
local cid=$2
232+
for i in $(seq 1 30); do
233+
sleep 1
234+
redis-cli -p "$port" PING 2>/dev/null | grep -q PONG && return 0
235+
done
236+
echo "ERROR: dragonfly on port $port did not start within 30s"
237+
docker logs "$cid" 2>&1 || true
238+
docker rm -f "$cid" 2>/dev/null || true
239+
return 1
240+
}
241+
242+
# Scenario 1: --cpus=2, no flag -> should auto-detect 2 threads
243+
CID=$(docker run --rm -d --cpus=2 --security-opt seccomp=unconfined \
244+
-v "$PWD/build/dragonfly:/dragonfly" -p 16380:6379 df-cgroup-test \
245+
/dragonfly --port 6379 --maxmemory 1G --dbfilename "" --noversion_check)
246+
247+
wait_for_server 16380 "$CID"
248+
249+
THREADS=$(redis-cli -p 16380 INFO server | grep -oP 'thread_count:\K\d+')
250+
docker rm -f "$CID" 2>/dev/null || true
251+
echo "Scenario 1: expected 2, got $THREADS"
252+
[ "$THREADS" -eq 2 ]
253+
254+
# Scenario 2: --cpus=2 + --proactor_threads=4 -> flag must win
255+
CID=$(docker run --rm -d --cpus=2 --security-opt seccomp=unconfined \
256+
-v "$PWD/build/dragonfly:/dragonfly" -p 16381:6379 df-cgroup-test \
257+
/dragonfly --port 6379 --proactor_threads 4 --maxmemory 1G --dbfilename "" --noversion_check)
258+
259+
wait_for_server 16381 "$CID"
260+
261+
THREADS=$(redis-cli -p 16381 INFO server | grep -oP 'thread_count:\K\d+')
262+
docker rm -f "$CID" 2>/dev/null || true
263+
echo "Scenario 2: expected 4, got $THREADS"
264+
[ "$THREADS" -eq 4 ]
265+
190266
lint-test-chart:
191267
runs-on: ubuntu-latest
192268
needs: [build]

src/server/dfly_main.cc

Lines changed: 9 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -82,6 +82,7 @@ ABSL_DECLARE_FLAG(uint32_t, memcached_port);
8282
ABSL_DECLARE_FLAG(uint16_t, admin_port);
8383
ABSL_DECLARE_FLAG(std::string, admin_bind);
8484
ABSL_DECLARE_FLAG(strings::MemoryBytesFlag, maxmemory);
85+
ABSL_DECLARE_FLAG(uint32_t, proactor_threads);
8586

8687
ABSL_FLAG(string, bind, "",
8788
"Bind address. If empty - binds on all interfaces. "
@@ -1080,6 +1081,14 @@ Usage: dragonfly [FLAGS]
10801081

10811082
#ifdef __linux__
10821083
UpdateResourceLimitsIfInsideContainer(&mem_info, &max_available_threads);
1084+
// If --proactor_threads (or DFLY_proactor_threads env var) was explicitly set by the user,
1085+
// honor it over the cgroup-derived CPU limit. The flag defaults to 0, so any non-zero value
1086+
// means the user explicitly requested a specific thread count.
1087+
if (absl::GetFlag(FLAGS_proactor_threads) > 0) {
1088+
LOG(INFO) << "Using proactor_threads=" << absl::GetFlag(FLAGS_proactor_threads)
1089+
<< " (overriding cgroup-derived " << max_available_threads << ")";
1090+
max_available_threads = 0; // causes ProactorPool to use FLAGS_proactor_threads
1091+
}
10831092
#endif
10841093

10851094
if (mem_info.swap_total != 0)
Lines changed: 66 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,66 @@
1+
"""
2+
Tests that --proactor_threads is respected even inside containers with CPU limits.
3+
4+
Background: UpdateResourceLimitsIfInsideContainer() reads the cgroup CPU quota and
5+
passes it as pool_size to ProactorPool. When pool_size != 0, helio's ProactorPool
6+
ignores FLAGS_proactor_threads entirely. The fix resets pool_size to 0 when
7+
--proactor_threads (or DFLY_proactor_threads) is explicitly set, so the flag wins.
8+
"""
9+
10+
import pytest
11+
from .instance import DflyInstanceFactory
12+
13+
14+
@pytest.mark.asyncio
15+
async def test_proactor_threads_flag_is_respected(df_factory: DflyInstanceFactory):
16+
"""Starting with --proactor_threads=N must result in exactly N threads."""
17+
server = df_factory.create(proactor_threads=3)
18+
server.start()
19+
client = server.client()
20+
try:
21+
info = await client.info("server")
22+
assert int(info["thread_count"]) == 3
23+
finally:
24+
await client.aclose()
25+
server.stop()
26+
27+
28+
@pytest.mark.asyncio
29+
async def test_proactor_threads_env_var_is_respected(df_factory: DflyInstanceFactory, monkeypatch):
30+
"""DFLY_proactor_threads env var must behave identically to the CLI flag.
31+
32+
The subprocess inherits the parent environment, so setting the variable here
33+
is equivalent to setting it in a Kubernetes pod's env: section.
34+
"""
35+
monkeypatch.setenv("DFLY_proactor_threads", "2")
36+
# No proactor_threads kwarg — only the env var should drive the thread count.
37+
server = df_factory.create()
38+
server.start()
39+
client = server.client()
40+
try:
41+
info = await client.info("server")
42+
assert int(info["thread_count"]) == 2
43+
finally:
44+
await client.aclose()
45+
server.stop()
46+
47+
48+
@pytest.mark.asyncio
49+
async def test_proactor_threads_flag_overrides_env_var(
50+
df_factory: DflyInstanceFactory, monkeypatch
51+
):
52+
"""CLI flag takes priority over DFLY_proactor_threads env var.
53+
54+
ParseFlagsFromEnv() skips env vars when the flag was already set on the
55+
command line (WasPresentOnCommandLine check), so the CLI value must win.
56+
"""
57+
monkeypatch.setenv("DFLY_proactor_threads", "2")
58+
server = df_factory.create(proactor_threads=4)
59+
server.start()
60+
client = server.client()
61+
try:
62+
info = await client.info("server")
63+
assert int(info["thread_count"]) == 4
64+
finally:
65+
await client.aclose()
66+
server.stop()

0 commit comments

Comments
 (0)