From f7c2807d950a8961c53560854282d317bd4209e0 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Jan=20St=C3=A1rek?= Date: Wed, 8 Apr 2026 10:41:36 +0200 Subject: [PATCH] fix: proactor_threads is respected in k8s --- .github/workflows/ci.yml | 76 ++++++++++++++++++++++++ src/server/dfly_main.cc | 9 +++ tests/dragonfly/proactor_threads_test.py | 66 ++++++++++++++++++++ 3 files changed, 151 insertions(+) create mode 100644 tests/dragonfly/proactor_threads_test.py diff --git a/.github/workflows/ci.yml b/.github/workflows/ci.yml index 251c0d285799..45f43b93ca2f 100644 --- a/.github/workflows/ci.yml +++ b/.github/workflows/ci.yml @@ -187,6 +187,82 @@ jobs: name: regression_logs path: /tmp/failed/* + - name: Upload dragonfly binary for cgroup test + if: matrix.container == 'ubuntu-dev:24' && matrix.build-type == 'Release' && matrix.sanitizers == 'NoSanitizers' && matrix.compiler.cxx == 'g++' + uses: actions/upload-artifact@v7 + with: + name: dragonfly-binary + path: ${{ github.workspace }}/build/dragonfly + retention-days: 1 + + cgroup-thread-detection: + runs-on: ubuntu-latest + needs: [build] + steps: + - uses: actions/checkout@v6 + + - uses: actions/download-artifact@v7 + with: + name: dragonfly-binary + path: build + + - name: Install redis-tools + run: sudo apt-get update && sudo apt-get install -y redis-tools + + - name: Prepare cgroup test image + run: | + chmod +x build/dragonfly + # Verify the binary's runtime dependencies and install them in a local image. + # The binary is built in ubuntu-dev:24 and links dynamically against Boost, + # which is not present in the minimal ubuntu:24.04 image. + echo "Runtime deps of dragonfly binary:" + ldd build/dragonfly + docker build -t df-cgroup-test - <<'EOF' + FROM ubuntu:24.04 + RUN apt-get update -qq && \ + apt-get install -y --no-install-recommends libboost-context1.83.0 libssl3 && \ + rm -rf /var/lib/apt/lists/* + EOF + + - name: Test cgroup CPU auto-detection + run: | + wait_for_server() { + local port=$1 + local cid=$2 + for i in $(seq 1 30); do + sleep 1 + redis-cli -p "$port" PING 2>/dev/null | grep -q PONG && return 0 + done + echo "ERROR: dragonfly on port $port did not start within 30s" + docker logs "$cid" 2>&1 || true + docker rm -f "$cid" 2>/dev/null || true + return 1 + } + + # Scenario 1: --cpus=2, no flag -> should auto-detect 2 threads + CID=$(docker run --rm -d --cpus=2 --security-opt seccomp=unconfined \ + -v "$PWD/build/dragonfly:/dragonfly" -p 16380:6379 df-cgroup-test \ + /dragonfly --port 6379 --maxmemory 1G --dbfilename "" --noversion_check) + + wait_for_server 16380 "$CID" + + THREADS=$(redis-cli -p 16380 INFO server | grep -oP 'thread_count:\K\d+') + docker rm -f "$CID" 2>/dev/null || true + echo "Scenario 1: expected 2, got $THREADS" + [ "$THREADS" -eq 2 ] + + # Scenario 2: --cpus=2 + --proactor_threads=4 -> flag must win + CID=$(docker run --rm -d --cpus=2 --security-opt seccomp=unconfined \ + -v "$PWD/build/dragonfly:/dragonfly" -p 16381:6379 df-cgroup-test \ + /dragonfly --port 6379 --proactor_threads 4 --maxmemory 1G --dbfilename "" --noversion_check) + + wait_for_server 16381 "$CID" + + THREADS=$(redis-cli -p 16381 INFO server | grep -oP 'thread_count:\K\d+') + docker rm -f "$CID" 2>/dev/null || true + echo "Scenario 2: expected 4, got $THREADS" + [ "$THREADS" -eq 4 ] + lint-test-chart: runs-on: ubuntu-latest needs: [build] diff --git a/src/server/dfly_main.cc b/src/server/dfly_main.cc index c7eb111048d1..8eee0e3ce296 100644 --- a/src/server/dfly_main.cc +++ b/src/server/dfly_main.cc @@ -82,6 +82,7 @@ ABSL_DECLARE_FLAG(uint32_t, memcached_port); ABSL_DECLARE_FLAG(uint16_t, admin_port); ABSL_DECLARE_FLAG(std::string, admin_bind); ABSL_DECLARE_FLAG(strings::MemoryBytesFlag, maxmemory); +ABSL_DECLARE_FLAG(uint32_t, proactor_threads); ABSL_FLAG(string, bind, "", "Bind address. If empty - binds on all interfaces. " @@ -1080,6 +1081,14 @@ Usage: dragonfly [FLAGS] #ifdef __linux__ UpdateResourceLimitsIfInsideContainer(&mem_info, &max_available_threads); + // If --proactor_threads (or DFLY_proactor_threads env var) was explicitly set by the user, + // honor it over the cgroup-derived CPU limit. The flag defaults to 0, so any non-zero value + // means the user explicitly requested a specific thread count. + if (absl::GetFlag(FLAGS_proactor_threads) > 0) { + LOG(INFO) << "Using proactor_threads=" << absl::GetFlag(FLAGS_proactor_threads) + << " (overriding cgroup-derived " << max_available_threads << ")"; + max_available_threads = 0; // causes ProactorPool to use FLAGS_proactor_threads + } #endif if (mem_info.swap_total != 0) diff --git a/tests/dragonfly/proactor_threads_test.py b/tests/dragonfly/proactor_threads_test.py new file mode 100644 index 000000000000..0b9fa574eb45 --- /dev/null +++ b/tests/dragonfly/proactor_threads_test.py @@ -0,0 +1,66 @@ +""" +Tests that --proactor_threads is respected even inside containers with CPU limits. + +Background: UpdateResourceLimitsIfInsideContainer() reads the cgroup CPU quota and +passes it as pool_size to ProactorPool. When pool_size != 0, helio's ProactorPool +ignores FLAGS_proactor_threads entirely. The fix resets pool_size to 0 when +--proactor_threads (or DFLY_proactor_threads) is explicitly set, so the flag wins. +""" + +import pytest +from .instance import DflyInstanceFactory + + +@pytest.mark.asyncio +async def test_proactor_threads_flag_is_respected(df_factory: DflyInstanceFactory): + """Starting with --proactor_threads=N must result in exactly N threads.""" + server = df_factory.create(proactor_threads=3) + server.start() + client = server.client() + try: + info = await client.info("server") + assert int(info["thread_count"]) == 3 + finally: + await client.aclose() + server.stop() + + +@pytest.mark.asyncio +async def test_proactor_threads_env_var_is_respected(df_factory: DflyInstanceFactory, monkeypatch): + """DFLY_proactor_threads env var must behave identically to the CLI flag. + + The subprocess inherits the parent environment, so setting the variable here + is equivalent to setting it in a Kubernetes pod's env: section. + """ + monkeypatch.setenv("DFLY_proactor_threads", "2") + # No proactor_threads kwarg — only the env var should drive the thread count. + server = df_factory.create() + server.start() + client = server.client() + try: + info = await client.info("server") + assert int(info["thread_count"]) == 2 + finally: + await client.aclose() + server.stop() + + +@pytest.mark.asyncio +async def test_proactor_threads_flag_overrides_env_var( + df_factory: DflyInstanceFactory, monkeypatch +): + """CLI flag takes priority over DFLY_proactor_threads env var. + + ParseFlagsFromEnv() skips env vars when the flag was already set on the + command line (WasPresentOnCommandLine check), so the CLI value must win. + """ + monkeypatch.setenv("DFLY_proactor_threads", "2") + server = df_factory.create(proactor_threads=4) + server.start() + client = server.client() + try: + info = await client.info("server") + assert int(info["thread_count"]) == 4 + finally: + await client.aclose() + server.stop()