Skip to content

Commit fa3d8f3

Browse files
committed
parallelize docker smoke tests
1 parent 25eeac4 commit fa3d8f3

3 files changed

Lines changed: 53 additions & 30 deletions

File tree

CHANGELOG.md

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -11,7 +11,7 @@
1111
* Split lint tools into a lightweight dependency group so the CI lint step no longer installs heavy runtime dependencies
1212
* Add explicit dependencies for `backoff`, `pandas`, `psutil`, `pypdf`, and `requests` (previously only transitive via `unstructured[all-docs]`)
1313
* Pre-download NLTK models before parallel test runs to prevent race conditions
14-
* Parallelize Docker smoke tests with pytest-xdist
14+
* Parallelize Docker smoke tests by running one container per xdist worker on dedicated ports
1515
* Remove unused `ARCH` variable from Makefile
1616

1717
## 0.0.93

scripts/docker-smoke-test.sh

Lines changed: 38 additions & 27 deletions
Original file line numberDiff line numberDiff line change
@@ -11,24 +11,20 @@
1111

1212
set -e
1313

14-
CONTAINER_NAME=unstructured-api-smoke-test
14+
CONTAINER_NAME_PREFIX=unstructured-api-smoke-test
1515
CONTAINER_NAME_PARALLEL=unstructured-api-smoke-test-parallel
1616
PIPELINE_FAMILY=${PIPELINE_FAMILY:-"general"}
1717
DOCKER_IMAGE="${DOCKER_IMAGE:-pipeline-family-${PIPELINE_FAMILY}-dev:latest}"
1818
SKIP_INFERENCE_TESTS="${SKIP_INFERENCE_TESTS:-false}"
19+
NUM_WORKERS="${NUM_WORKERS:-4}"
20+
BASE_PORT=8000
1921

2022
start_container() {
23+
local port=$1
24+
local name=$2
25+
local use_parallel_mode=${3:-false}
2126

22-
port=$1
23-
use_parallel_mode=$2
24-
25-
if [ "$use_parallel_mode" = "true" ]; then
26-
name=$CONTAINER_NAME_PARALLEL
27-
else
28-
name=$CONTAINER_NAME
29-
fi
30-
31-
echo Starting container "$name"
27+
echo Starting container "$name" on port "$port"
3228
docker run --platform "$DOCKER_PLATFORM" \
3329
-p "$port":"$port" \
3430
--entrypoint uvicorn \
@@ -42,8 +38,8 @@ start_container() {
4238
}
4339

4440
await_server_ready() {
45-
port=$1
46-
url=localhost:$port/healthcheck
41+
local port=$1
42+
local url=localhost:$port/healthcheck
4743

4844
# NOTE(rniko): Increasing the timeout to 120 seconds because emulated arm tests are slow to start
4945
for _ in {1..120}; do
@@ -60,37 +56,52 @@ await_server_ready() {
6056
exit 1
6157
}
6258

63-
stop_container() {
64-
echo Stopping container "$CONTAINER_NAME"
65-
# Note (austin) - if you're getting an error from the api, try dumping the logs
66-
# docker logs $CONTAINER_NAME 2> docker_logs.txt
67-
docker stop "$CONTAINER_NAME" 2> /dev/null || true
59+
stop_all_containers() {
60+
for i in $(seq 0 $((NUM_WORKERS-1))); do
61+
local name="${CONTAINER_NAME_PREFIX}-${i}"
62+
echo Stopping container "$name"
63+
docker stop "$name" 2> /dev/null || true
64+
done
6865

6966
echo Stopping container "$CONTAINER_NAME_PARALLEL"
7067
docker stop "$CONTAINER_NAME_PARALLEL" 2> /dev/null || true
7168
}
7269

73-
# Always clean up the container
74-
trap stop_container EXIT
70+
# Always clean up all containers
71+
trap stop_all_containers EXIT
72+
73+
#######################
74+
# Start worker containers
75+
#######################
76+
for i in $(seq 0 $((NUM_WORKERS-1))); do
77+
port=$((BASE_PORT + i))
78+
start_container "$port" "${CONTAINER_NAME_PREFIX}-${i}" "false"
79+
done
7580

76-
start_container 8000 "false"
77-
await_server_ready 8000
81+
for i in $(seq 0 $((NUM_WORKERS-1))); do
82+
port=$((BASE_PORT + i))
83+
await_server_ready "$port"
84+
done
7885

7986
#######################
8087
# Smoke Tests
8188
#######################
82-
echo Running smoke tests with SKIP_INFERENCE_TESTS: "$SKIP_INFERENCE_TESTS"
83-
PYTHONPATH=. SKIP_INFERENCE_TESTS=$SKIP_INFERENCE_TESTS uv run pytest -n auto -vv scripts/smoketest.py
89+
echo "Running smoke tests with SKIP_INFERENCE_TESTS: $SKIP_INFERENCE_TESTS, NUM_WORKERS: $NUM_WORKERS"
90+
PYTHONPATH=. SKIP_INFERENCE_TESTS=$SKIP_INFERENCE_TESTS SMOKE_TEST_BASE_PORT=$BASE_PORT \
91+
uv run pytest -n "$NUM_WORKERS" -vv scripts/smoketest.py
8492

8593
#######################
8694
# Test parallel vs single mode
8795
#######################
8896
if ! $SKIP_INFERENCE_TESTS; then
89-
start_container 9000 true
90-
await_server_ready 9000
97+
# Reuse the first container on BASE_PORT for single mode,
98+
# start a new one for parallel mode on a non-conflicting port
99+
parallel_port=$((BASE_PORT + NUM_WORKERS))
100+
start_container "$parallel_port" "$CONTAINER_NAME_PARALLEL" "true"
101+
await_server_ready "$parallel_port"
91102

92103
echo Running parallel mode test
93-
./scripts/parallel-mode-test.sh localhost:8000 localhost:9000
104+
./scripts/parallel-mode-test.sh "localhost:$BASE_PORT" "localhost:$parallel_port"
94105
fi
95106

96107
result=$?

scripts/smoketest.py

Lines changed: 14 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -11,11 +11,23 @@
1111
import pytest
1212
import requests
1313

14-
API_URL = "http://localhost:8000/general/v0/general"
1514
# NOTE(rniko): Skip inference tests if we're running on an emulated architecture
1615
skip_inference_tests = os.getenv("SKIP_INFERENCE_TESTS", "").lower() in {"true", "yes", "y", "1"}
1716

1817

18+
def _get_api_url() -> str:
19+
"""Determine the API URL for the current xdist worker.
20+
21+
Each pytest-xdist worker (gw0, gw1, ...) is assigned its own container
22+
on a dedicated port (base_port + worker_number). When running without
23+
xdist, defaults to base_port.
24+
"""
25+
base_port = int(os.environ.get("SMOKE_TEST_BASE_PORT", "8000"))
26+
worker_id = os.environ.get("PYTEST_XDIST_WORKER", "gw0")
27+
worker_num = int(worker_id.replace("gw", ""))
28+
return f"http://localhost:{base_port + worker_num}/general/v0/general"
29+
30+
1931
def send_document(
2032
filenames: List[str],
2133
filenames_gzipped: Optional[List[str]] = None,
@@ -42,7 +54,7 @@ def send_document(
4254
options["gz_uncompressed_content_type"] = uncompressed_content_type
4355

4456
return requests.post(
45-
API_URL,
57+
_get_api_url(),
4658
files=files,
4759
data=options,
4860
)

0 commit comments

Comments
 (0)