Skip to content

Commit 4ad6f28

Browse files
committed
Revert the parallel test attempt
1 parent 1caeb3d commit 4ad6f28

2 files changed

Lines changed: 41 additions & 76 deletions

File tree

CHANGELOG.md

Lines changed: 0 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -11,7 +11,6 @@
1111
* Add explicit dependencies for `backoff`, `pandas`, `psutil`, `pypdf`, and `requests` (previously only transitive via `unstructured[all-docs]`)
1212
* Pre-download NLTK models before parallel test runs to prevent race conditions
1313
* Parallelize Docker smoke tests by running one container per xdist worker on dedicated ports
14-
* Parallelize parallel-mode curl tests with background processes
1514
* Pin uv version in Dockerfile for reproducible builds
1615
* Remove `py3.12-pip` from Dockerfile (unused since uv migration)
1716
* Drop mypy from CI (ruff covers linting sufficiently)

scripts/parallel-mode-test.sh

Lines changed: 41 additions & 75 deletions
Original file line numberDiff line numberDiff line change
@@ -1,21 +1,16 @@
11
#!/usr/bin/env bash
22

33
# parallel-mode-test.sh
4-
# Run each test case against two instances of the api (single mode vs parallel mode)
5-
# and diff the outputs to make sure parallel mode does not alter the response.
6-
# All test cases run concurrently for speed.
4+
# Iterate a list of curl commands, and run each one against two instances of the api
5+
# The smoke test will start one container with parallel mode and one without, and
6+
# diff the two outputs to make sure parallel mode does not alter the response.
77
# Note the filepaths assume you ran this from the top level
88

99
# shellcheck disable=SC2317 # Shellcheck complains that trap functions are unreachable...
1010

11-
set -e
12-
1311
base_url_1=$1
1412
base_url_2=$2
1513

16-
tmpdir=$(mktemp -d)
17-
trap 'rm -rf "$tmpdir"' EXIT
18-
1914
declare -a curl_params=(
2015
"-F files=@sample-docs/layout-parser-paper.pdf -F 'strategy=fast'"
2116
"-F files=@sample-docs/layout-parser-paper.pdf -F 'strategy=auto"
@@ -26,71 +21,42 @@ declare -a curl_params=(
2621
"-F files=@sample-docs/layout-parser-paper.pdf -F 'hi_res_model_name=yolox'"
2722
)
2823

29-
run_test_case() {
30-
local idx=$1
31-
local params=$2
32-
local single_output="$tmpdir/single_${idx}.json"
33-
local parallel_output="$tmpdir/parallel_${idx}.json"
34-
35-
curl_command="curl $base_url_1/general/v0/general $params"
36-
echo "[$idx] Testing: $curl_command"
37-
38-
# Run in single mode
39-
# Note(austin): Parallel mode screws up hierarchy! While we deal with that,
40-
# let's ignore parent_id fields in the results
41-
$curl_command 2> /dev/null | jq -S 'del(..|.parent_id?)' > "$single_output"
42-
43-
# Stop if curl didn't work
44-
if [ ! -s "$single_output" ]; then
45-
echo "[$idx] Single mode command failed!"
46-
$curl_command
47-
return 1
48-
fi
49-
50-
# Run in parallel mode
51-
curl_command="curl $base_url_2/general/v0/general $params"
52-
$curl_command 2> /dev/null | jq -S 'del(..|.parent_id?)' > "$parallel_output"
53-
54-
# Stop if curl didn't work
55-
if [ ! -s "$parallel_output" ]; then
56-
echo "[$idx] Parallel mode command failed!"
57-
$curl_command
58-
return 1
59-
fi
60-
61-
local original_length
62-
local parallel_length
63-
original_length=$(jq 'length' "$single_output")
64-
parallel_length=$(jq 'length' "$parallel_output")
65-
66-
if [[ "$original_length" != "$parallel_length" ]]; then
67-
echo "[$idx] Parallel mode returned a different number of elements! ($original_length vs $parallel_length)"
68-
echo "[$idx] Params: $params"
69-
return 1
70-
fi
71-
72-
echo "[$idx] PASSED ($original_length elements)"
73-
}
74-
75-
# Launch all test cases concurrently
76-
pids=()
77-
for i in "${!curl_params[@]}"; do
78-
run_test_case "$i" "${curl_params[$i]}" &
79-
pids+=($!)
24+
for params in "${curl_params[@]}"
25+
do
26+
curl_command="curl $base_url_1/general/v0/general $params"
27+
echo Testing: "$curl_command"
28+
29+
# Run in single mode
30+
# Note(austin): Parallel mode screws up hierarchy! While we deal with that,
31+
# let's ignore parent_id fields in the results
32+
$curl_command 2> /dev/null | jq -S 'del(..|.parent_id?)' > output.json
33+
original_length=$(jq 'length' output.json)
34+
35+
# Stop if curl didn't work
36+
if [ ! -s output.json ]; then
37+
echo Command failed!
38+
$curl_command
39+
exit 1
40+
fi
41+
42+
# Run in parallel mode
43+
curl_command="curl $base_url_2/general/v0/general $params"
44+
$curl_command 2> /dev/null | jq -S 'del(..|.parent_id?)' > parallel_output.json
45+
parallel_length=$(jq 'length' parallel_output.json)
46+
47+
# Stop if curl didn't work
48+
if [ ! -s parallel_output.json ]; then
49+
echo Command failed!
50+
$curl_command
51+
exit 1
52+
fi
53+
54+
if ! [[ "$original_length" == "$parallel_length" ]]; then
55+
echo Parallel mode returned a different number of elements!
56+
echo Params: "$params"
57+
exit 1
58+
fi
59+
60+
rm -f output.json parallel_output.json
61+
echo
8062
done
81-
82-
# Wait for all and collect failures
83-
failed=0
84-
for i in "${!pids[@]}"; do
85-
if ! wait "${pids[$i]}"; then
86-
echo "Test case $i failed!"
87-
failed=1
88-
fi
89-
done
90-
91-
if [ "$failed" -ne 0 ]; then
92-
echo "Some parallel mode tests failed!"
93-
exit 1
94-
fi
95-
96-
echo "All parallel mode tests passed!"

0 commit comments

Comments
 (0)