Skip to content

Commit c590d85

Browse files
committed
WIP: try different machines
1 parent 96e6aa4 commit c590d85

2 files changed

Lines changed: 8 additions & 5 deletions

File tree

.github/workflows/continuous-benchmark-transfer.yaml

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -17,11 +17,11 @@ on:
1717
type: string
1818
server_type:
1919
description: 'Hetzner server type'
20-
default: 'cpx42'
20+
default: 'ccx13'
2121
type: string
2222
client_type:
2323
description: 'Hetzner client type'
24-
default: 'cpx42'
24+
default: 'cx32'
2525
type: string
2626
push:
2727
branches:

ansible/playbooks/roles/run-transfer-speed/files/shard_transfer.py

Lines changed: 6 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -88,8 +88,8 @@ def setup_collection(self, dims: int):
8888
self.primary.create_collection(
8989
COLLECTION_NAME,
9090
vectors_config=models.VectorParams(size=dims, distance=models.Distance.COSINE, on_disk=True),
91-
# Set to 0 to disable automatic indexing during upload
92-
optimizers_config=models.OptimizersConfigDiff(indexing_threshold=0),
91+
# Disable optimization threads during upload for faster ingestion
92+
optimizers_config=models.OptimizersConfigDiff(max_optimization_threads=0),
9393
)
9494

9595
def upload_vectors(self, vectors: np.ndarray):
@@ -98,10 +98,13 @@ def upload_vectors(self, vectors: np.ndarray):
9898
collection_name=COLLECTION_NAME,
9999
vectors=vectors,
100100
ids=range(len(vectors)),
101+
batch_size=1024,
102+
parallel=16,
101103
)
104+
# Re-enable optimization after upload
102105
self.primary.update_collection(
103106
collection_name=COLLECTION_NAME,
104-
optimizer_config=models.OptimizersConfigDiff(indexing_threshold=1),
107+
optimizer_config=models.OptimizersConfigDiff(max_optimization_threads=100_000),
105108
)
106109

107110
def wait_for_green(self, timeout: int = 1800):

0 commit comments

Comments
 (0)