|
12 | 12 |
|
13 | 13 | QDRANT_URIS = os.getenv("QDRANT_URIS", "http://localhost:6333").split(",") |
14 | 14 | QDRANT_API_KEY = os.getenv("QDRANT_API_KEY") |
15 | | -DATASET_NAME = os.getenv("DATASET_NAME", "dbpedia-openai-100K-1536-angular") |
| 15 | +DATASET_NAME = os.getenv("DATASET_NAME", "laion-small-clip") |
16 | 16 | RUNS = int(os.getenv("RUNS", 3)) |
17 | 17 | OUTPUT_FILENAME = os.getenv("OUTPUT_FILENAME", "output.json") |
18 | 18 | WORK_DIR = Path(os.getenv("WORK_DIR", Path(__file__).parent)) |
@@ -97,16 +97,28 @@ def setup_collection(self, dims: int): |
97 | 97 | ) |
98 | 98 |
|
99 | 99 | def upload_vectors(self, vectors: np.ndarray): |
100 | | - print(f"Uploading {len(vectors):,} vectors...") |
101 | | - self.primary.upload_collection( |
102 | | - collection_name=COLLECTION_NAME, |
103 | | - vectors=vectors, |
104 | | - ids=range(len(vectors)), |
105 | | - batch_size=1024, |
106 | | - parallel=16, |
107 | | - ) |
| 100 | + num_vectors = len(vectors) |
| 101 | + batch_size = 1024 |
| 102 | + print(f"Uploading {num_vectors:,} vectors in batches of {batch_size}...") |
| 103 | + |
| 104 | + for start in range(0, num_vectors, batch_size): |
| 105 | + end = min(start + batch_size, num_vectors) |
| 106 | + batch_vectors = vectors[start:end].tolist() # Convert only current batch |
| 107 | + batch_ids = list(range(start, end)) |
| 108 | + |
| 109 | + self.primary.upsert( |
| 110 | + collection_name=COLLECTION_NAME, |
| 111 | + points=models.Batch( |
| 112 | + ids=batch_ids, |
| 113 | + vectors=batch_vectors, |
| 114 | + ), |
| 115 | + wait=False, |
| 116 | + ) |
| 117 | + |
| 118 | + if (end % 10000) == 0 or end == num_vectors: |
| 119 | + print(f" Uploaded {end:,}/{num_vectors:,} vectors") |
108 | 120 |
|
109 | | - def wait_for_green(self, timeout: int = 600): |
| 121 | + def wait_for_green(self, timeout: int = 1800): |
110 | 122 | print("Waiting for green status...") |
111 | 123 | wait_time = 5.0 |
112 | 124 | total = 0 |
|
0 commit comments