Skip to content

Commit c5b4d45

Browse files
Add transfer speed benchmark (#280)
* Add shard transfer speed benchmark * Final tweaks * Add schedule * [pre-commit.ci] auto fixes from pre-commit.com hooks for more information, see https://pre-commit.ci * Simplify code --------- Co-authored-by: pre-commit-ci[bot] <66853113+pre-commit-ci[bot]@users.noreply.github.com>
1 parent 3270671 commit c5b4d45

14 files changed

Lines changed: 677 additions & 17 deletions

File tree

.github/workflows/actions/create-inventory/action.yaml

Lines changed: 39 additions & 12 deletions
Original file line numberDiff line numberDiff line change
@@ -4,9 +4,13 @@ inputs:
44
hcloud_token:
55
description: "Hetzner Cloud API token"
66
required: true
7-
server_name:
8-
description: "Name of the server"
7+
server_names:
8+
description: "Space-separated list of server names"
99
required: true
10+
client_names:
11+
description: "Space-separated list of client names (optional)"
12+
required: false
13+
default: ""
1014
db_host:
1115
description: "Database host"
1216
required: true
@@ -19,7 +23,8 @@ runs:
1923
run: |
2024
export HCLOUD_TOKEN=${{ inputs.hcloud_token }}
2125
export POSTGRES_HOST=${{ inputs.db_host }}
22-
export SERVER_NAME="${{ inputs.server_name }}"
26+
27+
apk add --no-cache jq || apt-get install -y jq
2328
2429
# Download and install hcloud
2530
HCVERSION=v1.36.0
@@ -28,20 +33,42 @@ runs:
2833
mv hcloud /usr/local/bin
2934
chmod +x /usr/local/bin/hcloud
3035
31-
IP_OF_THE_SERVER=$(bash "tools/hetzner/get_public_ip.sh" "$SERVER_NAME")
32-
3336
# Create ansible inventory.ini file
34-
cat <<EOL > inventory.ini
35-
[remote_machines]
36-
benchmark-machine ansible_host=${IP_OF_THE_SERVER} ansible_user=root
37-
[db_hosts]
38-
benchmark-db ansible_host=${POSTGRES_HOST} ansible_user=root
39-
EOL
37+
echo "[remote_machines]" > inventory.ini
38+
39+
# Add each server to inventory
40+
i=0
41+
for SERVER_NAME in ${{ inputs.server_names }}; do
42+
PUBLIC_IP=$(bash "tools/hetzner/get_public_ip.sh" "$SERVER_NAME")
43+
PRIVATE_IP=$(bash "tools/hetzner/get_private_ip.sh" "$SERVER_NAME")
44+
echo "node-${i} ansible_host=${PUBLIC_IP} private_ip=${PRIVATE_IP} ansible_user=root" >> inventory.ini
45+
i=$((i + 1))
46+
done
47+
48+
# Add client_machines group if client_names is provided
49+
CLIENT_NAMES="${{ inputs.client_names }}"
50+
if [ -n "$CLIENT_NAMES" ]; then
51+
echo "" >> inventory.ini
52+
echo "[client_machines]" >> inventory.ini
53+
i=0
54+
for CLIENT_NAME in $CLIENT_NAMES; do
55+
IP=$(bash "tools/hetzner/get_public_ip.sh" "$CLIENT_NAME")
56+
echo "client-${i} ansible_host=${IP} ansible_user=root" >> inventory.ini
57+
i=$((i + 1))
58+
done
59+
fi
60+
61+
# Add db_hosts group
62+
echo "" >> inventory.ini
63+
echo "[db_hosts]" >> inventory.ini
64+
echo "benchmark-db ansible_host=${POSTGRES_HOST} ansible_user=root" >> inventory.ini
4065
4166
mv inventory.ini ansible/playbooks/inventory.ini
4267
- name: Prepare datasets.yml
4368
shell: bash
4469
run: |
45-
apk add yq
70+
if ! command -v yq &> /dev/null; then
71+
apk add --no-cache yq || apt-get install -y yq || true
72+
fi
4673
echo -e "datasets:\n" > ansible/playbooks/group_vars/datasets.yml
4774
yq -p json -o yaml datasets/datasets.json >> ansible/playbooks/group_vars/datasets.yml

.github/workflows/continuous-benchmark-hnsw.yaml

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -41,7 +41,7 @@ jobs:
4141
with:
4242
hcloud_token: ${{ secrets.HCLOUD_TOKEN }}
4343
db_host: ${{ secrets.POSTGRES_HOST }}
44-
server_name: "benchmark-server-1"
44+
server_names: "benchmark-server-1"
4545
- name: Run bench
4646
id: hnsw-indexing-update
4747
run: |
@@ -64,7 +64,7 @@ jobs:
6464
with:
6565
hcloud_token: ${{ secrets.HCLOUD_TOKEN }}
6666
db_host: ${{ secrets.POSTGRES_HOST }}
67-
server_name: "benchmark-server-1"
67+
server_names: "benchmark-server-1"
6868
- name: Run bench
6969
id: hnsw-indexing-transform
7070
run: |
Lines changed: 174 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,174 @@
1+
name: Continuous Benchmark Shard Transfer Speed
2+
3+
on:
4+
workflow_dispatch:
5+
inputs:
6+
dataset_name:
7+
description: 'Dataset name'
8+
default: 'h-and-m-2048-angular-filters'
9+
type: string
10+
qdrant_versions:
11+
description: 'Comma-separated versions to compare (ghcr/dev, docker/master, docker/v1.13.0). Can be just single version.'
12+
default: 'ghcr/dev,docker/master'
13+
type: string
14+
cluster_nodes:
15+
description: 'Number of cluster nodes'
16+
default: '2'
17+
type: string
18+
region:
19+
description: 'Hetzner region'
20+
default: 'fsn1'
21+
type: string
22+
server_type:
23+
description: 'Hetzner server type'
24+
default: 'ccx13'
25+
type: string
26+
client_type:
27+
description: 'Hetzner client type'
28+
default: 'cx33'
29+
type: string
30+
schedule:
31+
- cron: "0 4 * * *" # Daily at 4am UTC
32+
33+
concurrency:
34+
group: hetzner-machines
35+
36+
env:
37+
HCLOUD_TOKEN: ${{ secrets.HCLOUD_TOKEN }}
38+
# Defaults for push trigger (inputs.* is empty on push)
39+
SERVER_TYPE: ${{ inputs.server_type || 'ccx13' }}
40+
CLIENT_TYPE: ${{ inputs.client_type || 'cx33' }}
41+
REGION: ${{ inputs.region || 'fsn1' }}
42+
DATASET_NAME: ${{ inputs.dataset_name || 'h-and-m-2048-angular-filters' }}
43+
QDRANT_VERSIONS: ${{ inputs.qdrant_versions || 'ghcr/dev,docker/master' }}
44+
CLUSTER_NODES: ${{ inputs.cluster_nodes || '2' }}
45+
46+
jobs:
47+
generateMatrix:
48+
name: Generate Matrix
49+
runs-on: ubuntu-latest
50+
outputs:
51+
matrix: ${{ steps.generate.outputs.matrix }}
52+
node_names: ${{ steps.generate.outputs.node_names }}
53+
client_name: ${{ steps.generate.outputs.client_name }}
54+
steps:
55+
- name: Generate machine matrix
56+
id: generate
57+
run: |
58+
RUN_ID=${{ github.run_id }}
59+
60+
python >> $GITHUB_OUTPUT << "EOF"
61+
import json
62+
import os
63+
64+
NODES = int(os.environ["CLUSTER_NODES"])
65+
RUN_ID = ${{ github.run_id }}
66+
67+
machines = []
68+
node_names = []
69+
for i in range(NODES):
70+
machines.append({
71+
"name": f"node-{i}", "suffix": f"node-{i}", "type": "SERVER_TYPE"
72+
})
73+
node_names.append(f"transfer-bench-node-{i}-{RUN_ID}")
74+
machines.append({
75+
"name": "client", "suffix": "client", "type": "CLIENT_TYPE"
76+
})
77+
78+
print(f"matrix={json.dumps({'machine': machines})}")
79+
print(f'node_names={" ".join(node_names)}')
80+
print(f"client_name=transfer-bench-client-{RUN_ID}")
81+
EOF
82+
83+
setupCluster:
84+
name: Setup ${{ matrix.machine.name }}
85+
needs: generateMatrix
86+
runs-on: ubuntu-latest
87+
strategy:
88+
fail-fast: true
89+
matrix: ${{ fromJSON(needs.generateMatrix.outputs.matrix) }}
90+
steps:
91+
- uses: actions/checkout@v4
92+
- uses: webfactory/ssh-agent@v0.8.0
93+
with:
94+
ssh-private-key: ${{ secrets.SSH_PRIVATE_KEY }}
95+
- name: Setup CI tools
96+
run: bash -x tools/setup_ci.sh
97+
- name: Create machine
98+
uses: ./.github/workflows/actions/create-server-with-retry
99+
with:
100+
server_name: transfer-bench-${{ matrix.machine.suffix }}-${{ github.run_id }}
101+
server_type: ${{ matrix.machine.type == 'CLIENT_TYPE' && env.CLIENT_TYPE || env.SERVER_TYPE }}
102+
region: ${{ env.REGION }}
103+
max_retries: 2
104+
105+
runBenchmark:
106+
name: Run Transfer Benchmark
107+
needs: [generateMatrix, setupCluster]
108+
runs-on: ubuntu-latest
109+
container: alpine/ansible:2.18.1
110+
steps:
111+
- uses: actions/checkout@v4
112+
- uses: webfactory/ssh-agent@v0.8.0
113+
with:
114+
ssh-private-key: ${{ secrets.SSH_PRIVATE_KEY }}
115+
- name: Create inventory
116+
uses: ./.github/workflows/actions/create-inventory
117+
with:
118+
hcloud_token: ${{ secrets.HCLOUD_TOKEN }}
119+
server_names: ${{ needs.generateMatrix.outputs.node_names }}
120+
client_names: ${{ needs.generateMatrix.outputs.client_name }}
121+
db_host: ${{ secrets.POSTGRES_HOST }}
122+
- name: Run benchmarks for all versions
123+
run: |
124+
echo "$QDRANT_VERSIONS" | tr ',' '\n' | while read -r QDRANT_VERSION; do
125+
QDRANT_VERSION=$(echo "$QDRANT_VERSION" | xargs)
126+
[ -z "$QDRANT_VERSION" ] && continue
127+
128+
echo "=========================================="
129+
echo "Benchmarking Qdrant version: $QDRANT_VERSION"
130+
echo "=========================================="
131+
132+
case "${QDRANT_VERSION}" in
133+
docker/*)
134+
CONTAINER_REGISTRY="docker.io"
135+
VERSION=${QDRANT_VERSION#docker/}
136+
;;
137+
ghcr/*)
138+
CONTAINER_REGISTRY="ghcr.io"
139+
VERSION=${QDRANT_VERSION#ghcr/}
140+
;;
141+
*)
142+
echo "Error: unknown version ${QDRANT_VERSION}. Version name should start with 'docker/' or 'ghcr/'"
143+
exit 1
144+
;;
145+
esac
146+
147+
ansible-playbook ansible/playbooks/playbook-transfer-speed.yml \
148+
-i ansible/playbooks/inventory.ini \
149+
--extra-vars "dataset_name=${DATASET_NAME} server_registry=${CONTAINER_REGISTRY} server_version=${VERSION}"
150+
done
151+
env:
152+
ANSIBLE_HOST_KEY_CHECKING: "False"
153+
ANSIBLE_SSH_ARGS: "-o ServerAliveInterval=30 -o ServerAliveCountMax=10 -o ControlMaster=no"
154+
ANSIBLE_STDOUT_CALLBACK: yaml
155+
QDRANT_VERSIONS: ${{ env.QDRANT_VERSIONS }}
156+
DATASET_NAME: ${{ env.DATASET_NAME }}
157+
158+
cleanup:
159+
name: Cleanup Cluster
160+
needs: [generateMatrix, runBenchmark]
161+
if: always()
162+
runs-on: ubuntu-latest
163+
steps:
164+
- uses: actions/checkout@v4
165+
- name: Setup CI tools
166+
run: bash -x tools/setup_ci.sh
167+
env:
168+
HCLOUD_TOKEN: ${{ secrets.HCLOUD_TOKEN }}
169+
- name: Delete machines
170+
env:
171+
NODE_NAMES: ${{ needs.generateMatrix.outputs.node_names }}
172+
CLIENT_NAME: ${{ needs.generateMatrix.outputs.client_name }}
173+
HCLOUD_TOKEN: ${{ secrets.HCLOUD_TOKEN }}
174+
run: echo $NODE_NAMES $CLIENT_NAME | xargs -P0 -n1 hcloud server delete

.github/workflows/manual-benchmarks-cascade.yaml

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -40,6 +40,10 @@ on:
4040
server_machine_type:
4141
description: "Hetzner server machine type to run the benchmarks in"
4242
default: "cpx41"
43+
44+
concurrency:
45+
group: hetzner-machines
46+
4347
env:
4448
# Common environment variables
4549
HCLOUD_TOKEN: ${{ secrets.HCLOUD_TOKEN }}

ansible/README.md

Lines changed: 9 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -8,8 +8,15 @@
88
Add inventory.ini in [ansible/playbooks/](playbooks) with the following content:
99
```ini
1010
[remote_machines]
11-
;note that machine's name should be benchmark-machine
12-
benchmark-machine ansible_host=${YOUR_SERVER_IP} ansible_user=${YOUR_USER}
11+
;note that machine's name should be node-*
12+
node-0 ansible_host=${YOUR_SERVER_IP} private_ip=${YOUR_SERVER_PRIVATE_IP} ansible_user=${YOUR_USER}
13+
;for multi-node benchmarks (playbook-transfer-speed.yml), add more machines here, i.e.:
14+
;node-1 ansible_host=x.x.x.x private_ip=x.x.x.x ansible_user=root
15+
16+
;optional, only required for some benchmarks, i.e for playbook-transfer-speed.yml
17+
[client_machines]
18+
client-0 ansible_host=x.x.x.x private_ip=x.x.x.x ansible_user=root
19+
1320
[db_hosts]
1421
benchmark-db ansible_host=${YOUR_SERVER_IP} ansible_user=${YOUR_USER}
1522
```
Lines changed: 18 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,18 @@
1+
services:
2+
qdrant:
3+
image: ${CONTAINER_REGISTRY}/qdrant/qdrant:${QDRANT_VERSION}
4+
container_name: qdrant-continuous
5+
restart: unless-stopped
6+
privileged: true
7+
ports:
8+
- "6333:6333"
9+
- "6334:6334"
10+
- "6335:6335"
11+
environment:
12+
- QDRANT__CLUSTER__ENABLED=true
13+
- QDRANT__CLUSTER__P2P__PORT=6335
14+
- QDRANT__LOG_LEVEL=DEBUG
15+
- QDRANT__FEATURE_FLAGS__ALL=${QDRANT__FEATURE_FLAGS__ALL:-false}
16+
- QDRANT__STORAGE__PERFORMANCE__ASYNC_SCORER=true
17+
command: >
18+
sh -c "${BOOTSTRAP_URI:+sleep 5 && }./qdrant --uri '${NODE_URI}' ${BOOTSTRAP_URI:+--bootstrap '${BOOTSTRAP_URI}'}"
Lines changed: 7 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,7 @@
1+
logging_dir: "/tmp/logs"
2+
working_dir: "/tmp/experiments"
3+
4+
# Server configuration (overridden via --extra-vars from workflow)
5+
server_registry: "ghcr.io"
6+
server_version: "dev"
7+
feature_flags: "true"
Lines changed: 11 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,11 @@
1+
logging_dir: "/tmp/logs"
2+
working_dir: "/tmp/experiments"
3+
4+
# Benchmark parameters (can be overridden via --extra-vars)
5+
dataset_name: "laion-small-clip"
6+
runs: 3
7+
8+
# Server configuration (overridden via --extra-vars from workflow)
9+
server_registry: "ghcr.io"
10+
server_version: "dev"
11+
feature_flags: "true"

ansible/playbooks/playbook-hnsw-index.yml

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -60,7 +60,7 @@
6060

6161
- name: Insert data into table
6262
ansible.builtin.shell: |
63-
result="{{ hostvars['benchmark-machine']['precision_score'] }}"
63+
result="{{ hostvars[groups['remote_machines'][0]]['precision_score'] }}"
6464
engine="{{ server.name }}-{{ server.version }}"
6565
precision_before_iteration=$(echo "$result" | grep -oP "${engine}_precision_before_iteration=\K[^,]+")
6666
precision_after_iteration=$(echo "$result" | grep -oP "${engine}_precision_after_iteration=\K[^,]+")
Lines changed: 10 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,10 @@
1+
---
2+
- name: Setup Qdrant cluster only
3+
hosts: remote_machines
4+
become: yes
5+
vars_files: ["group_vars/qdrant-cluster.yml"]
6+
vars:
7+
node_uri: "http://{{ private_ip }}:6335"
8+
bootstrap_uri: "{{ '' if inventory_hostname == groups['remote_machines'][0] else 'http://' + hostvars[groups['remote_machines'][0]]['private_ip'] + ':6335' }}"
9+
roles:
10+
- setup-qdrant-cluster

0 commit comments

Comments
 (0)