Skip to content

Commit e719ea8

Browse files
committed
WIP: debug
1 parent 39eb4de commit e719ea8

7 files changed

Lines changed: 36 additions & 26 deletions

File tree

.github/workflows/actions/create-inventory/action.yaml

Lines changed: 3 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -38,8 +38,9 @@ runs:
3838
i=0
3939
IFS=',' read -ra SERVERS <<< "${{ inputs.server_names }}"
4040
for SERVER_NAME in "${SERVERS[@]}"; do
41-
IP=$(bash "tools/hetzner/get_public_ip.sh" "$SERVER_NAME")
42-
echo "node-${i} ansible_host=${IP} ansible_user=root" >> inventory.ini
41+
PUBLIC_IP=$(bash "tools/hetzner/get_public_ip.sh" "$SERVER_NAME")
42+
PRIVATE_IP=$(bash "tools/hetzner/get_private_ip.sh" "$SERVER_NAME")
43+
echo "node-${i} ansible_host=${PUBLIC_IP} private_ip=${PRIVATE_IP} ansible_user=root" >> inventory.ini
4344
i=$((i + 1))
4445
done
4546

.github/workflows/continuous-benchmark-transfer.yaml

Lines changed: 4 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -5,11 +5,11 @@ on:
55
inputs:
66
dataset_name:
77
description: 'Dataset name'
8-
default: 'dbpedia-openai-100K-1536-angular'
8+
default: 'laion-small-clip'
99
type: string
1010
qdrant_versions:
1111
description: 'Comma-separated versions (ghcr/dev, docker/master, docker/v1.13.0)'
12-
default: 'ghcr/dev'
12+
default: 'ghcr/dev,docker/master'
1313
type: string
1414
region:
1515
description: 'Hetzner region'
@@ -38,8 +38,8 @@ env:
3838
SERVER_TYPE: ${{ inputs.server_type || 'ccx13' }}
3939
CLIENT_TYPE: ${{ inputs.client_type || 'cx33' }}
4040
REGION: ${{ inputs.region || 'fsn1' }}
41-
DATASET_NAME: ${{ inputs.dataset_name || 'dbpedia-openai-100K-1536-angular' }}
42-
QDRANT_VERSIONS: ${{ inputs.qdrant_versions || 'ghcr/dev' }}
41+
DATASET_NAME: ${{ inputs.dataset_name || 'laion-small-clip' }}
42+
QDRANT_VERSIONS: ${{ inputs.qdrant_versions || 'ghcr/dev,docker/master' }}
4343

4444
jobs:
4545
setupCluster:

ansible/playbooks/files/transfer-speed/docker-compose-cluster.yaml

Lines changed: 1 addition & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -3,6 +3,7 @@ services:
33
image: ${CONTAINER_REGISTRY}/qdrant/qdrant:${QDRANT_VERSION}
44
container_name: qdrant-continuous
55
restart: unless-stopped
6+
privileged: true
67
ports:
78
- "6333:6333"
89
- "6334:6334"
@@ -13,7 +14,5 @@ services:
1314
- QDRANT__LOG_LEVEL=DEBUG
1415
- QDRANT__FEATURE_FLAGS__ALL=${QDRANT__FEATURE_FLAGS__ALL:-false}
1516
- QDRANT__STORAGE__PERFORMANCE__ASYNC_SCORER=true
16-
volumes:
17-
- ./qdrant_data:/qdrant/storage
1817
command: >
1918
sh -c "${BOOTSTRAP_URI:+sleep 5 && }./qdrant --uri '${NODE_URI}' ${BOOTSTRAP_URI:+--bootstrap '${BOOTSTRAP_URI}'}"

ansible/playbooks/group_vars/transfer-speed.yml

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -2,7 +2,7 @@ logging_dir: "/tmp/logs"
22
working_dir: "/tmp/experiments"
33

44
# Benchmark parameters (can be overridden via --extra-vars)
5-
dataset_name: "dbpedia-openai-1M-1536-angular"
5+
dataset_name: "laion-small-clip"
66
runs: 3
77

88
# Server configuration (overridden via --extra-vars from workflow)

ansible/playbooks/playbook-transfer-speed.yml

Lines changed: 3 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -8,8 +8,8 @@
88

99
- name: Set cluster URIs
1010
set_fact:
11-
node_uri: "http://{{ ansible_host }}:6335"
12-
bootstrap_uri: "{{ '' if inventory_hostname == groups['remote_machines'][0] else 'http://' + hostvars[groups['remote_machines'][0]]['ansible_host'] + ':6335' }}"
11+
node_uri: "http://{{ private_ip }}:6335"
12+
bootstrap_uri: "{{ '' if inventory_hostname == groups['remote_machines'][0] else 'http://' + hostvars[groups['remote_machines'][0]]['private_ip'] + ':6335' }}"
1313

1414
- name: Setup Qdrant cluster node
1515
include_role:
@@ -19,7 +19,7 @@
1919
hosts: client_machines
2020
become: yes
2121
vars:
22-
qdrant_uris: "{{ groups['remote_machines'] | map('extract', hostvars, 'ansible_host') | map('regex_replace', '^(.*)$', 'http://\\1:6333') | join(',') }}"
22+
qdrant_uris: "{{ groups['remote_machines'] | map('extract', hostvars, 'private_ip') | map('regex_replace', '^(.*)$', 'http://\\1:6333') | join(',') }}"
2323
tasks:
2424
- name: Load variables
2525
include_vars: "group_vars/transfer-speed.yml"

ansible/playbooks/roles/run-transfer-speed/files/shard_transfer.py

Lines changed: 22 additions & 10 deletions
Original file line numberDiff line numberDiff line change
@@ -12,7 +12,7 @@
1212

1313
QDRANT_URIS = os.getenv("QDRANT_URIS", "http://localhost:6333").split(",")
1414
QDRANT_API_KEY = os.getenv("QDRANT_API_KEY")
15-
DATASET_NAME = os.getenv("DATASET_NAME", "dbpedia-openai-100K-1536-angular")
15+
DATASET_NAME = os.getenv("DATASET_NAME", "laion-small-clip")
1616
RUNS = int(os.getenv("RUNS", 3))
1717
OUTPUT_FILENAME = os.getenv("OUTPUT_FILENAME", "output.json")
1818
WORK_DIR = Path(os.getenv("WORK_DIR", Path(__file__).parent))
@@ -97,16 +97,28 @@ def setup_collection(self, dims: int):
9797
)
9898

9999
def upload_vectors(self, vectors: np.ndarray):
100-
print(f"Uploading {len(vectors):,} vectors...")
101-
self.primary.upload_collection(
102-
collection_name=COLLECTION_NAME,
103-
vectors=vectors,
104-
ids=range(len(vectors)),
105-
batch_size=1024,
106-
parallel=16,
107-
)
100+
num_vectors = len(vectors)
101+
batch_size = 1024
102+
print(f"Uploading {num_vectors:,} vectors in batches of {batch_size}...")
103+
104+
for start in range(0, num_vectors, batch_size):
105+
end = min(start + batch_size, num_vectors)
106+
batch_vectors = vectors[start:end].tolist() # Convert only current batch
107+
batch_ids = list(range(start, end))
108+
109+
self.primary.upsert(
110+
collection_name=COLLECTION_NAME,
111+
points=models.Batch(
112+
ids=batch_ids,
113+
vectors=batch_vectors,
114+
),
115+
wait=False,
116+
)
117+
118+
if (end % 10000) == 0 or end == num_vectors:
119+
print(f" Uploaded {end:,}/{num_vectors:,} vectors")
108120

109-
def wait_for_green(self, timeout: int = 600):
121+
def wait_for_green(self, timeout: int = 1800):
110122
print("Waiting for green status...")
111123
wait_time = 5.0
112124
total = 0

ansible/playbooks/roles/setup-qdrant-cluster/tasks/main.yml

Lines changed: 2 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -16,12 +16,10 @@
1616

1717
- name: Stop existing containers
1818
ansible.builtin.shell: |
19-
cd {{ working_dir }} && docker compose down 2>/dev/null || true
19+
cd {{ working_dir }} && docker compose down -v 2>/dev/null || true
2020
docker rm -f qdrant-continuous 2>/dev/null || true
21-
# Kill any container using port 6333
2221
docker ps -q --filter "publish=6333" | xargs -r docker rm -f 2>/dev/null || true
23-
# Also remove the data directory to ensure clean state
24-
rm -rf {{ working_dir }}/qdrant_data || true
22+
docker volume rm qdrant_storage 2>/dev/null || true
2523
ignore_errors: yes
2624

2725
- name: Remove old image to ensure fresh pull

0 commit comments

Comments
 (0)