Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
76 changes: 76 additions & 0 deletions .github/workflows/ci.yml
Original file line number Diff line number Diff line change
Expand Up @@ -187,6 +187,82 @@ jobs:
name: regression_logs
path: /tmp/failed/*

- name: Upload dragonfly binary for cgroup test
if: matrix.container == 'ubuntu-dev:24' && matrix.build-type == 'Release' && matrix.sanitizers == 'NoSanitizers' && matrix.compiler.cxx == 'g++'
uses: actions/upload-artifact@v7
with:
name: dragonfly-binary
path: ${{ github.workspace }}/build/dragonfly
retention-days: 1

cgroup-thread-detection:
runs-on: ubuntu-latest
needs: [build]
steps:
- uses: actions/checkout@v6

- uses: actions/download-artifact@v7
with:
name: dragonfly-binary
path: build

- name: Install redis-tools
run: sudo apt-get update && sudo apt-get install -y redis-tools

- name: Prepare cgroup test image
run: |
chmod +x build/dragonfly
# Verify the binary's runtime dependencies and install them in a local image.
# The binary is built in ubuntu-dev:24 and links dynamically against Boost,
# which is not present in the minimal ubuntu:24.04 image.
echo "Runtime deps of dragonfly binary:"
ldd build/dragonfly
docker build -t df-cgroup-test - <<'EOF'
FROM ubuntu:24.04
RUN apt-get update -qq && \
apt-get install -y --no-install-recommends libboost-context1.83.0 libssl3 && \
rm -rf /var/lib/apt/lists/*
EOF

- name: Test cgroup CPU auto-detection
run: |
wait_for_server() {
local port=$1
local cid=$2
for i in $(seq 1 30); do
sleep 1
redis-cli -p "$port" PING 2>/dev/null | grep -q PONG && return 0
done
echo "ERROR: dragonfly on port $port did not start within 30s"
docker logs "$cid" 2>&1 || true
docker rm -f "$cid" 2>/dev/null || true
return 1
}

# Scenario 1: --cpus=2, no flag -> should auto-detect 2 threads
CID=$(docker run --rm -d --cpus=2 --security-opt seccomp=unconfined \
-v "$PWD/build/dragonfly:/dragonfly" -p 16380:6379 df-cgroup-test \
/dragonfly --port 6379 --maxmemory 1G --dbfilename "" --noversion_check)

wait_for_server 16380 "$CID"

THREADS=$(redis-cli -p 16380 INFO server | grep -oP 'thread_count:\K\d+')
docker rm -f "$CID" 2>/dev/null || true
echo "Scenario 1: expected 2, got $THREADS"
[ "$THREADS" -eq 2 ]

# Scenario 2: --cpus=2 + --proactor_threads=4 -> flag must win
CID=$(docker run --rm -d --cpus=2 --security-opt seccomp=unconfined \
-v "$PWD/build/dragonfly:/dragonfly" -p 16381:6379 df-cgroup-test \
/dragonfly --port 6379 --proactor_threads 4 --maxmemory 1G --dbfilename "" --noversion_check)

wait_for_server 16381 "$CID"

THREADS=$(redis-cli -p 16381 INFO server | grep -oP 'thread_count:\K\d+')
docker rm -f "$CID" 2>/dev/null || true
echo "Scenario 2: expected 4, got $THREADS"
[ "$THREADS" -eq 4 ]

lint-test-chart:
runs-on: ubuntu-latest
needs: [build]
Expand Down
9 changes: 9 additions & 0 deletions src/server/dfly_main.cc
Original file line number Diff line number Diff line change
Expand Up @@ -82,6 +82,7 @@ ABSL_DECLARE_FLAG(uint32_t, memcached_port);
ABSL_DECLARE_FLAG(uint16_t, admin_port);
ABSL_DECLARE_FLAG(std::string, admin_bind);
ABSL_DECLARE_FLAG(strings::MemoryBytesFlag, maxmemory);
ABSL_DECLARE_FLAG(uint32_t, proactor_threads);

ABSL_FLAG(string, bind, "",
"Bind address. If empty - binds on all interfaces. "
Expand Down Expand Up @@ -1080,6 +1081,14 @@ Usage: dragonfly [FLAGS]

#ifdef __linux__
UpdateResourceLimitsIfInsideContainer(&mem_info, &max_available_threads);
// If --proactor_threads (or DFLY_proactor_threads env var) was explicitly set by the user,
// honor it over the cgroup-derived CPU limit. The flag defaults to 0, so any non-zero value
// means the user explicitly requested a specific thread count.
if (absl::GetFlag(FLAGS_proactor_threads) > 0) {
LOG(INFO) << "Using proactor_threads=" << absl::GetFlag(FLAGS_proactor_threads)
<< " (overriding cgroup-derived " << max_available_threads << ")";
max_available_threads = 0; // causes ProactorPool to use FLAGS_proactor_threads
}
#endif

if (mem_info.swap_total != 0)
Expand Down
66 changes: 66 additions & 0 deletions tests/dragonfly/proactor_threads_test.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,66 @@
"""
Tests that --proactor_threads is respected even inside containers with CPU limits.

Background: UpdateResourceLimitsIfInsideContainer() reads the cgroup CPU quota and
passes it as pool_size to ProactorPool. When pool_size != 0, helio's ProactorPool
ignores FLAGS_proactor_threads entirely. The fix resets pool_size to 0 when
--proactor_threads (or DFLY_proactor_threads) is explicitly set, so the flag wins.
"""

import pytest
from .instance import DflyInstanceFactory


@pytest.mark.asyncio
async def test_proactor_threads_flag_is_respected(df_factory: DflyInstanceFactory):
"""Starting with --proactor_threads=N must result in exactly N threads."""
server = df_factory.create(proactor_threads=3)
server.start()
client = server.client()
try:
info = await client.info("server")
assert int(info["thread_count"]) == 3
finally:
await client.aclose()
server.stop()


@pytest.mark.asyncio
async def test_proactor_threads_env_var_is_respected(df_factory: DflyInstanceFactory, monkeypatch):
"""DFLY_proactor_threads env var must behave identically to the CLI flag.

The subprocess inherits the parent environment, so setting the variable here
is equivalent to setting it in a Kubernetes pod's env: section.
"""
monkeypatch.setenv("DFLY_proactor_threads", "2")
# No proactor_threads kwarg — only the env var should drive the thread count.
server = df_factory.create()
server.start()
client = server.client()
try:
info = await client.info("server")
assert int(info["thread_count"]) == 2
finally:
await client.aclose()
server.stop()


@pytest.mark.asyncio
async def test_proactor_threads_flag_overrides_env_var(
df_factory: DflyInstanceFactory, monkeypatch
):
"""CLI flag takes priority over DFLY_proactor_threads env var.

ParseFlagsFromEnv() skips env vars when the flag was already set on the
command line (WasPresentOnCommandLine check), so the CLI value must win.
"""
monkeypatch.setenv("DFLY_proactor_threads", "2")
server = df_factory.create(proactor_threads=4)
server.start()
client = server.client()
try:
info = await client.info("server")
assert int(info["thread_count"]) == 4
finally:
await client.aclose()
server.stop()
Loading