|
21 | 21 | import io.milvus.v2.common.ConsistencyLevel; |
22 | 22 | import io.milvus.v2.common.DataType; |
23 | 23 | import io.milvus.v2.common.IndexParam; |
| 24 | +import io.milvus.v2.service.collection.request.AddFieldReq; |
24 | 25 | import io.milvus.v2.service.collection.request.CreateCollectionReq; |
25 | 26 | import io.milvus.v2.service.collection.request.DescribeCollectionReq; |
26 | 27 | import io.milvus.v2.service.collection.request.LoadCollectionReq; |
@@ -275,6 +276,110 @@ public static String createNewCollection(int dim, String collectionName, DataTyp |
275 | 276 | log.info("create collection:" + collectionName); |
276 | 277 | return collectionName; |
277 | 278 | } |
| 279 | + |
| 280 | + /** |
| 281 | + * Create a new collection with Varchar primary key |
| 282 | + * |
| 283 | + * @param dim dimension of the vector field |
| 284 | + * @param collectionName collection name |
| 285 | + * @param vectorType vector data type |
| 286 | + * @return collection name |
| 287 | + */ |
| 288 | + public static String createNewCollectionWithVarcharPK(int dim, String collectionName, DataType vectorType) { |
| 289 | + if (collectionName == null || collectionName.equals("")) { |
| 290 | + collectionName = "Collection_" + GenerateUtil.getRandomString(10); |
| 291 | + } |
| 292 | + // Use Varchar as primary key |
| 293 | + CreateCollectionReq.FieldSchema fieldVarcharPK = CreateCollectionReq.FieldSchema.builder() |
| 294 | + .autoID(false) |
| 295 | + .dataType(DataType.VarChar) |
| 296 | + .isPrimaryKey(true) |
| 297 | + .name(CommonData.fieldVarchar) |
| 298 | + .maxLength(100) |
| 299 | + .build(); |
| 300 | + CreateCollectionReq.FieldSchema fieldInt64 = CreateCollectionReq.FieldSchema.builder() |
| 301 | + .dataType(io.milvus.v2.common.DataType.Int64) |
| 302 | + .isPrimaryKey(false) |
| 303 | + .name(CommonData.fieldInt64) |
| 304 | + .build(); |
| 305 | + CreateCollectionReq.FieldSchema fieldInt32 = CreateCollectionReq.FieldSchema.builder() |
| 306 | + .dataType(DataType.Int32) |
| 307 | + .name(CommonData.fieldInt32) |
| 308 | + .isPrimaryKey(false) |
| 309 | + .build(); |
| 310 | + CreateCollectionReq.FieldSchema fieldInt8 = CreateCollectionReq.FieldSchema.builder() |
| 311 | + .dataType(DataType.Int8) |
| 312 | + .name(CommonData.fieldInt8) |
| 313 | + .isPrimaryKey(false) |
| 314 | + .build(); |
| 315 | + CreateCollectionReq.FieldSchema fieldFloat = CreateCollectionReq.FieldSchema.builder() |
| 316 | + .dataType(DataType.Float) |
| 317 | + .name(CommonData.fieldFloat) |
| 318 | + .isPrimaryKey(false) |
| 319 | + .build(); |
| 320 | + CreateCollectionReq.FieldSchema fieldDouble = CreateCollectionReq.FieldSchema.builder() |
| 321 | + .dataType(DataType.Double) |
| 322 | + .name(CommonData.fieldDouble) |
| 323 | + .isPrimaryKey(false) |
| 324 | + .build(); |
| 325 | + CreateCollectionReq.FieldSchema fieldBool = CreateCollectionReq.FieldSchema.builder() |
| 326 | + .dataType(DataType.Bool) |
| 327 | + .name(CommonData.fieldBool) |
| 328 | + .isPrimaryKey(false) |
| 329 | + .build(); |
| 330 | + CreateCollectionReq.FieldSchema fieldJson = CreateCollectionReq.FieldSchema.builder() |
| 331 | + .dataType(DataType.JSON) |
| 332 | + .name(CommonData.fieldJson) |
| 333 | + .isPrimaryKey(false) |
| 334 | + .build(); |
| 335 | + CreateCollectionReq.FieldSchema fieldVector = CreateCollectionReq.FieldSchema.builder() |
| 336 | + .dataType(vectorType) |
| 337 | + .isPrimaryKey(false) |
| 338 | + .build(); |
| 339 | + if (vectorType == DataType.FloatVector) { |
| 340 | + fieldVector.setDimension(dim); |
| 341 | + fieldVector.setName(CommonData.fieldFloatVector); |
| 342 | + } |
| 343 | + if (vectorType == DataType.BinaryVector) { |
| 344 | + fieldVector.setDimension(dim); |
| 345 | + fieldVector.setName(CommonData.fieldBinaryVector); |
| 346 | + } |
| 347 | + if (vectorType == DataType.Float16Vector) { |
| 348 | + fieldVector.setDimension(dim); |
| 349 | + fieldVector.setName(CommonData.fieldFloat16Vector); |
| 350 | + } |
| 351 | + if (vectorType == DataType.BFloat16Vector) { |
| 352 | + fieldVector.setDimension(dim); |
| 353 | + fieldVector.setName(CommonData.fieldBF16Vector); |
| 354 | + } |
| 355 | + if (vectorType == DataType.SparseFloatVector) { |
| 356 | + fieldVector.setName(CommonData.fieldSparseVector); |
| 357 | + } |
| 358 | + List<CreateCollectionReq.FieldSchema> fieldSchemaList = new ArrayList<>(); |
| 359 | + fieldSchemaList.add(fieldVarcharPK); |
| 360 | + fieldSchemaList.add(fieldInt64); |
| 361 | + fieldSchemaList.add(fieldInt32); |
| 362 | + fieldSchemaList.add(fieldInt8); |
| 363 | + fieldSchemaList.add(fieldFloat); |
| 364 | + fieldSchemaList.add(fieldDouble); |
| 365 | + fieldSchemaList.add(fieldBool); |
| 366 | + fieldSchemaList.add(fieldJson); |
| 367 | + fieldSchemaList.add(fieldVector); |
| 368 | + CreateCollectionReq.CollectionSchema collectionSchema = CreateCollectionReq.CollectionSchema.builder() |
| 369 | + .fieldSchemaList(fieldSchemaList) |
| 370 | + .build(); |
| 371 | + CreateCollectionReq createCollectionReq = CreateCollectionReq.builder() |
| 372 | + .collectionSchema(collectionSchema) |
| 373 | + .collectionName(collectionName) |
| 374 | + .enableDynamicField(false) |
| 375 | + .description("collection with varchar primary key") |
| 376 | + .numShards(1) |
| 377 | + .build(); |
| 378 | + milvusClientV2.createCollection(createCollectionReq); |
| 379 | + log.info("create collection with varchar pk:" + collectionName); |
| 380 | + return collectionName; |
| 381 | + } |
| 382 | + |
278 | 383 | public static String createNewCollectionWithDatabase(int dim, String collectionName, DataType vectorType,String databaseName) { |
279 | 384 | if (collectionName == null || collectionName.equals("")) { |
280 | 385 | collectionName = "Collection_" + GenerateUtil.getRandomString(10); |
@@ -929,6 +1034,60 @@ public static List<JsonObject> generateDefaultData(long startId, long num, int d |
929 | 1034 | return jsonList; |
930 | 1035 | } |
931 | 1036 |
|
| 1037 | + /** |
| 1038 | + * Generate data with varchar primary key for collection created by createNewCollectionWithVarcharPK |
| 1039 | + * |
| 1040 | + * @param startId start id |
| 1041 | + * @param num number of entities to generate |
| 1042 | + * @param dim dimension of vector |
| 1043 | + * @param vectorType vector data type |
| 1044 | + * @return List of JsonObject representing the data rows |
| 1045 | + */ |
| 1046 | + public static List<JsonObject> generateDataWithVarcharPK(long startId, long num, int dim, DataType vectorType) { |
| 1047 | + List<JsonObject> jsonList = new ArrayList<>(); |
| 1048 | + Random ran = new Random(); |
| 1049 | + Gson gson = new Gson(); |
| 1050 | + for (long i = startId; i < (num + startId); i++) { |
| 1051 | + JsonObject row = new JsonObject(); |
| 1052 | + // Use varchar as primary key |
| 1053 | + row.addProperty(CommonData.fieldVarchar, "Str" + i); |
| 1054 | + row.addProperty(CommonData.fieldInt64, i); |
| 1055 | + row.addProperty(CommonData.fieldInt32, (int) i % 32767); |
| 1056 | + row.addProperty(CommonData.fieldInt8, (short) i % 127); |
| 1057 | + row.addProperty(CommonData.fieldDouble, (double) i); |
| 1058 | + row.addProperty(CommonData.fieldBool, i % 2 == 0); |
| 1059 | + row.addProperty(CommonData.fieldFloat, (float) i); |
| 1060 | + // Generate vector based on type |
| 1061 | + if (vectorType == DataType.FloatVector) { |
| 1062 | + List<Float> vector = new ArrayList<>(); |
| 1063 | + for (int k = 0; k < dim; ++k) { |
| 1064 | + vector.add(ran.nextFloat()); |
| 1065 | + } |
| 1066 | + row.add(CommonData.fieldFloatVector, gson.toJsonTree(vector)); |
| 1067 | + } |
| 1068 | + if (vectorType == DataType.BinaryVector) { |
| 1069 | + row.add(CommonData.fieldBinaryVector, gson.toJsonTree(generateBinaryVector(dim).array())); |
| 1070 | + } |
| 1071 | + if (vectorType == DataType.Float16Vector) { |
| 1072 | + row.add(CommonData.fieldFloat16Vector, gson.toJsonTree(generateFloat16Vector(dim).array())); |
| 1073 | + } |
| 1074 | + if (vectorType == DataType.BFloat16Vector) { |
| 1075 | + row.add(CommonData.fieldBF16Vector, gson.toJsonTree(generateBF16Vector(dim).array())); |
| 1076 | + } |
| 1077 | + if (vectorType == DataType.SparseFloatVector) { |
| 1078 | + row.add(CommonData.fieldSparseVector, gson.toJsonTree(generateSparseVector(dim))); |
| 1079 | + } |
| 1080 | + JsonObject json = new JsonObject(); |
| 1081 | + json.addProperty(CommonData.fieldInt64, (int) i % 32767); |
| 1082 | + json.addProperty(CommonData.fieldInt32, (int) i % 32767); |
| 1083 | + json.addProperty(CommonData.fieldDouble, (double) i); |
| 1084 | + json.addProperty(CommonData.fieldFloat, (float) i); |
| 1085 | + row.add(CommonData.fieldJson, json); |
| 1086 | + jsonList.add(row); |
| 1087 | + } |
| 1088 | + return jsonList; |
| 1089 | + } |
| 1090 | + |
932 | 1091 | public static List<JsonObject> generateDefaultDataWithDynamic(long startId, long num, int dim, DataType vectorType) { |
933 | 1092 | List<JsonObject> jsonList = new ArrayList<>(); |
934 | 1093 | Random ran = new Random(); |
@@ -1975,6 +2134,192 @@ public static void multiFilesUpload(String path, List<List<String>> batchFiles) |
1975 | 2134 |
|
1976 | 2135 | } |
1977 | 2136 |
|
| 2137 | + // ==================== Struct Array Related Methods ==================== |
| 2138 | + |
| 2139 | + /** |
| 2140 | + * Create a collection schema with Struct field containing vectors |
| 2141 | + * |
| 2142 | + * @param collectionName collection name |
| 2143 | + * @param dim vector dimension |
| 2144 | + * @return collection name |
| 2145 | + */ |
| 2146 | + public static String createStructCollection(String collectionName, int dim) { |
| 2147 | + if (collectionName == null || collectionName.isEmpty()) { |
| 2148 | + collectionName = "StructCollection_" + GenerateUtil.getRandomString(10); |
| 2149 | + } |
| 2150 | + |
| 2151 | + CreateCollectionReq.CollectionSchema collectionSchema = CreateCollectionReq.CollectionSchema.builder() |
| 2152 | + .build(); |
| 2153 | + |
| 2154 | + // Primary key field |
| 2155 | + collectionSchema.addField(AddFieldReq.builder() |
| 2156 | + .fieldName(CommonData.fieldInt64) |
| 2157 | + .dataType(DataType.Int64) |
| 2158 | + .isPrimaryKey(true) |
| 2159 | + .autoID(false) |
| 2160 | + .build()); |
| 2161 | + |
| 2162 | + // Regular float vector field |
| 2163 | + collectionSchema.addField(AddFieldReq.builder() |
| 2164 | + .fieldName(CommonData.fieldFloatVector) |
| 2165 | + .dataType(DataType.FloatVector) |
| 2166 | + .dimension(dim) |
| 2167 | + .build()); |
| 2168 | + |
| 2169 | + // Struct array field with multiple sub-fields including vectors |
| 2170 | + collectionSchema.addField(AddFieldReq.builder() |
| 2171 | + .fieldName(CommonData.fieldStruct) |
| 2172 | + .description("struct array field with vectors") |
| 2173 | + .dataType(DataType.Array) |
| 2174 | + .elementType(DataType.Struct) |
| 2175 | + .maxCapacity(CommonData.structMaxCapacity) |
| 2176 | + .addStructField(AddFieldReq.builder() |
| 2177 | + .fieldName(CommonData.structFieldInt32) |
| 2178 | + .description("int32 field in struct") |
| 2179 | + .dataType(DataType.Int32) |
| 2180 | + .build()) |
| 2181 | + .addStructField(AddFieldReq.builder() |
| 2182 | + .fieldName(CommonData.structFieldVarchar) |
| 2183 | + .description("varchar field in struct") |
| 2184 | + .dataType(DataType.VarChar) |
| 2185 | + .maxLength(1024) |
| 2186 | + .build()) |
| 2187 | + .addStructField(AddFieldReq.builder() |
| 2188 | + .fieldName(CommonData.structFieldFloatVector1) |
| 2189 | + .description("first float vector in struct") |
| 2190 | + .dataType(DataType.FloatVector) |
| 2191 | + .dimension(dim) |
| 2192 | + .build()) |
| 2193 | + .addStructField(AddFieldReq.builder() |
| 2194 | + .fieldName(CommonData.structFieldFloatVector2) |
| 2195 | + .description("second float vector in struct") |
| 2196 | + .dataType(DataType.FloatVector) |
| 2197 | + .dimension(dim) |
| 2198 | + .build()) |
| 2199 | + .build()); |
| 2200 | + |
| 2201 | + CreateCollectionReq createCollectionReq = CreateCollectionReq.builder() |
| 2202 | + .collectionName(collectionName) |
| 2203 | + .collectionSchema(collectionSchema) |
| 2204 | + .enableDynamicField(false) |
| 2205 | + .numShards(1) |
| 2206 | + .build(); |
| 2207 | + |
| 2208 | + milvusClientV2.createCollection(createCollectionReq); |
| 2209 | + log.info("Created struct collection: " + collectionName); |
| 2210 | + return collectionName; |
| 2211 | + } |
| 2212 | + |
| 2213 | + /** |
| 2214 | + * Generate data for struct collection |
| 2215 | + * |
| 2216 | + * @param startId start id |
| 2217 | + * @param count number of rows |
| 2218 | + * @param dim vector dimension |
| 2219 | + * @return list of JsonObject data |
| 2220 | + */ |
| 2221 | + public static List<JsonObject> generateStructData(long startId, long count, int dim) { |
| 2222 | + List<JsonObject> dataList = new ArrayList<>(); |
| 2223 | + Random random = new Random(); |
| 2224 | + |
| 2225 | + for (long i = startId; i < startId + count; i++) { |
| 2226 | + JsonObject row = new JsonObject(); |
| 2227 | + row.addProperty(CommonData.fieldInt64, i); |
| 2228 | + |
| 2229 | + // Regular float vector |
| 2230 | + List<Float> vector = GenerateUtil.generateFloatVector(1, 6, dim).get(0); |
| 2231 | + row.add(CommonData.fieldFloatVector, new com.google.gson.Gson().toJsonTree(vector)); |
| 2232 | + |
| 2233 | + // Struct array - each row has 3-10 struct elements |
| 2234 | + int structCount = random.nextInt(8) + 3; |
| 2235 | + JsonArray structArray = new JsonArray(); |
| 2236 | + for (int j = 0; j < structCount; j++) { |
| 2237 | + JsonObject structElement = new JsonObject(); |
| 2238 | + structElement.addProperty(CommonData.structFieldInt32, random.nextInt(10000)); |
| 2239 | + structElement.addProperty(CommonData.structFieldVarchar, "struct_desc_" + i + "_" + j); |
| 2240 | + |
| 2241 | + // First vector in struct |
| 2242 | + List<Float> vec1 = GenerateUtil.generateFloatVector(1, 6, dim).get(0); |
| 2243 | + structElement.add(CommonData.structFieldFloatVector1, new com.google.gson.Gson().toJsonTree(vec1)); |
| 2244 | + |
| 2245 | + // Second vector in struct |
| 2246 | + List<Float> vec2 = GenerateUtil.generateFloatVector(1, 6, dim).get(0); |
| 2247 | + structElement.add(CommonData.structFieldFloatVector2, new com.google.gson.Gson().toJsonTree(vec2)); |
| 2248 | + |
| 2249 | + structArray.add(structElement); |
| 2250 | + } |
| 2251 | + row.add(CommonData.fieldStruct, structArray); |
| 2252 | + |
| 2253 | + dataList.add(row); |
| 2254 | + } |
| 2255 | + return dataList; |
| 2256 | + } |
| 2257 | + |
| 2258 | + /** |
| 2259 | + * Create embedding list index for struct vector field |
| 2260 | + * |
| 2261 | + * @param collectionName collection name |
| 2262 | + * @param structFieldName struct field name |
| 2263 | + * @param vectorFieldName vector field name in struct |
| 2264 | + * @param indexName index name |
| 2265 | + * @param metricType metric type (MAX_SIM_COSINE, MAX_SIM_IP, MAX_SIM_L2) |
| 2266 | + */ |
| 2267 | + public static void createStructVectorIndex(String collectionName, String structFieldName, |
| 2268 | + String vectorFieldName, String indexName, |
| 2269 | + IndexParam.MetricType metricType) { |
| 2270 | + String fullFieldName = String.format("%s[%s]", structFieldName, vectorFieldName); |
| 2271 | + IndexParam indexParam = IndexParam.builder() |
| 2272 | + .fieldName(fullFieldName) |
| 2273 | + .indexName(indexName) |
| 2274 | + .indexType(IndexParam.IndexType.HNSW) |
| 2275 | + .metricType(metricType) |
| 2276 | + .extraParams(new HashMap<String, Object>() {{ |
| 2277 | + put("M", 16); |
| 2278 | + put("efConstruction", 200); |
| 2279 | + }}) |
| 2280 | + .build(); |
| 2281 | + |
| 2282 | + milvusClientV2.createIndex(CreateIndexReq.builder() |
| 2283 | + .collectionName(collectionName) |
| 2284 | + .indexParams(Collections.singletonList(indexParam)) |
| 2285 | + .build()); |
| 2286 | + log.info("Created struct vector index: " + indexName + " on " + fullFieldName); |
| 2287 | + } |
| 2288 | + |
| 2289 | + /** |
| 2290 | + * Generate EmbeddingList from struct query result |
| 2291 | + * |
| 2292 | + * @param structData struct field data from query result |
| 2293 | + * @param vectorFieldName vector field name in struct |
| 2294 | + * @return EmbeddingList |
| 2295 | + */ |
| 2296 | + public static EmbeddingList generateEmbeddingListFromStruct(List<Map<String, Object>> structData, |
| 2297 | + String vectorFieldName) { |
| 2298 | + EmbeddingList embeddingList = new EmbeddingList(); |
| 2299 | + for (Map<String, Object> struct : structData) { |
| 2300 | + @SuppressWarnings("unchecked") |
| 2301 | + List<Float> vector = (List<Float>) struct.get(vectorFieldName); |
| 2302 | + embeddingList.add(new FloatVec(vector)); |
| 2303 | + } |
| 2304 | + return embeddingList; |
| 2305 | + } |
| 2306 | + |
| 2307 | + /** |
| 2308 | + * Generate random EmbeddingList for search |
| 2309 | + * |
| 2310 | + * @param vectorCount number of vectors in embedding list |
| 2311 | + * @param dim vector dimension |
| 2312 | + * @return EmbeddingList |
| 2313 | + */ |
| 2314 | + public static EmbeddingList generateRandomEmbeddingList(int vectorCount, int dim) { |
| 2315 | + EmbeddingList embeddingList = new EmbeddingList(); |
| 2316 | + for (int i = 0; i < vectorCount; i++) { |
| 2317 | + List<Float> vector = GenerateUtil.generateFloatVector(1, 6, dim).get(0); |
| 2318 | + embeddingList.add(new FloatVec(vector)); |
| 2319 | + } |
| 2320 | + return embeddingList; |
| 2321 | + } |
| 2322 | + |
1978 | 2323 | } |
1979 | 2324 |
|
1980 | 2325 |
|
0 commit comments