Skip to content

Commit 93065fa

Browse files
authored
add test cases for search by id and struct array. (#1749) (#1753)
Signed-off-by: yongpengli-z <yongpeng.li@zilliz.com>
1 parent 8fe6872 commit 93065fa

File tree

5 files changed

+2581
-33
lines changed

5 files changed

+2581
-33
lines changed

tests/milvustestv2/src/main/java/com/zilliz/milvustestv2/common/CommonData.java

Lines changed: 10 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -40,6 +40,16 @@ public class CommonData {
4040
public static String fieldSparseVector = "fieldSparseVector";
4141
public static int addMaxLength = 99;
4242

43+
// Struct related fields
44+
public static String defaultStructCollection = "StructCollection";
45+
public static String fieldStruct = "fieldStruct";
46+
public static String structFieldInt32 = "structInt32";
47+
public static String structFieldVarchar = "structVarchar";
48+
public static String structFieldFloatVector1 = "structFloatVector1";
49+
public static String structFieldFloatVector2 = "structFloatVector2";
50+
public static int structVectorDim = 128;
51+
public static int structMaxCapacity = 100;
52+
4353

4454
public static String partitionName = "partitionName";
4555
// 快速创建时候的默认向量filed

tests/milvustestv2/src/main/java/com/zilliz/milvustestv2/common/CommonFunction.java

Lines changed: 345 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -21,6 +21,7 @@
2121
import io.milvus.v2.common.ConsistencyLevel;
2222
import io.milvus.v2.common.DataType;
2323
import io.milvus.v2.common.IndexParam;
24+
import io.milvus.v2.service.collection.request.AddFieldReq;
2425
import io.milvus.v2.service.collection.request.CreateCollectionReq;
2526
import io.milvus.v2.service.collection.request.DescribeCollectionReq;
2627
import io.milvus.v2.service.collection.request.LoadCollectionReq;
@@ -275,6 +276,110 @@ public static String createNewCollection(int dim, String collectionName, DataTyp
275276
log.info("create collection:" + collectionName);
276277
return collectionName;
277278
}
279+
280+
/**
281+
* Create a new collection with Varchar primary key
282+
*
283+
* @param dim dimension of the vector field
284+
* @param collectionName collection name
285+
* @param vectorType vector data type
286+
* @return collection name
287+
*/
288+
public static String createNewCollectionWithVarcharPK(int dim, String collectionName, DataType vectorType) {
289+
if (collectionName == null || collectionName.equals("")) {
290+
collectionName = "Collection_" + GenerateUtil.getRandomString(10);
291+
}
292+
// Use Varchar as primary key
293+
CreateCollectionReq.FieldSchema fieldVarcharPK = CreateCollectionReq.FieldSchema.builder()
294+
.autoID(false)
295+
.dataType(DataType.VarChar)
296+
.isPrimaryKey(true)
297+
.name(CommonData.fieldVarchar)
298+
.maxLength(100)
299+
.build();
300+
CreateCollectionReq.FieldSchema fieldInt64 = CreateCollectionReq.FieldSchema.builder()
301+
.dataType(io.milvus.v2.common.DataType.Int64)
302+
.isPrimaryKey(false)
303+
.name(CommonData.fieldInt64)
304+
.build();
305+
CreateCollectionReq.FieldSchema fieldInt32 = CreateCollectionReq.FieldSchema.builder()
306+
.dataType(DataType.Int32)
307+
.name(CommonData.fieldInt32)
308+
.isPrimaryKey(false)
309+
.build();
310+
CreateCollectionReq.FieldSchema fieldInt8 = CreateCollectionReq.FieldSchema.builder()
311+
.dataType(DataType.Int8)
312+
.name(CommonData.fieldInt8)
313+
.isPrimaryKey(false)
314+
.build();
315+
CreateCollectionReq.FieldSchema fieldFloat = CreateCollectionReq.FieldSchema.builder()
316+
.dataType(DataType.Float)
317+
.name(CommonData.fieldFloat)
318+
.isPrimaryKey(false)
319+
.build();
320+
CreateCollectionReq.FieldSchema fieldDouble = CreateCollectionReq.FieldSchema.builder()
321+
.dataType(DataType.Double)
322+
.name(CommonData.fieldDouble)
323+
.isPrimaryKey(false)
324+
.build();
325+
CreateCollectionReq.FieldSchema fieldBool = CreateCollectionReq.FieldSchema.builder()
326+
.dataType(DataType.Bool)
327+
.name(CommonData.fieldBool)
328+
.isPrimaryKey(false)
329+
.build();
330+
CreateCollectionReq.FieldSchema fieldJson = CreateCollectionReq.FieldSchema.builder()
331+
.dataType(DataType.JSON)
332+
.name(CommonData.fieldJson)
333+
.isPrimaryKey(false)
334+
.build();
335+
CreateCollectionReq.FieldSchema fieldVector = CreateCollectionReq.FieldSchema.builder()
336+
.dataType(vectorType)
337+
.isPrimaryKey(false)
338+
.build();
339+
if (vectorType == DataType.FloatVector) {
340+
fieldVector.setDimension(dim);
341+
fieldVector.setName(CommonData.fieldFloatVector);
342+
}
343+
if (vectorType == DataType.BinaryVector) {
344+
fieldVector.setDimension(dim);
345+
fieldVector.setName(CommonData.fieldBinaryVector);
346+
}
347+
if (vectorType == DataType.Float16Vector) {
348+
fieldVector.setDimension(dim);
349+
fieldVector.setName(CommonData.fieldFloat16Vector);
350+
}
351+
if (vectorType == DataType.BFloat16Vector) {
352+
fieldVector.setDimension(dim);
353+
fieldVector.setName(CommonData.fieldBF16Vector);
354+
}
355+
if (vectorType == DataType.SparseFloatVector) {
356+
fieldVector.setName(CommonData.fieldSparseVector);
357+
}
358+
List<CreateCollectionReq.FieldSchema> fieldSchemaList = new ArrayList<>();
359+
fieldSchemaList.add(fieldVarcharPK);
360+
fieldSchemaList.add(fieldInt64);
361+
fieldSchemaList.add(fieldInt32);
362+
fieldSchemaList.add(fieldInt8);
363+
fieldSchemaList.add(fieldFloat);
364+
fieldSchemaList.add(fieldDouble);
365+
fieldSchemaList.add(fieldBool);
366+
fieldSchemaList.add(fieldJson);
367+
fieldSchemaList.add(fieldVector);
368+
CreateCollectionReq.CollectionSchema collectionSchema = CreateCollectionReq.CollectionSchema.builder()
369+
.fieldSchemaList(fieldSchemaList)
370+
.build();
371+
CreateCollectionReq createCollectionReq = CreateCollectionReq.builder()
372+
.collectionSchema(collectionSchema)
373+
.collectionName(collectionName)
374+
.enableDynamicField(false)
375+
.description("collection with varchar primary key")
376+
.numShards(1)
377+
.build();
378+
milvusClientV2.createCollection(createCollectionReq);
379+
log.info("create collection with varchar pk:" + collectionName);
380+
return collectionName;
381+
}
382+
278383
public static String createNewCollectionWithDatabase(int dim, String collectionName, DataType vectorType,String databaseName) {
279384
if (collectionName == null || collectionName.equals("")) {
280385
collectionName = "Collection_" + GenerateUtil.getRandomString(10);
@@ -929,6 +1034,60 @@ public static List<JsonObject> generateDefaultData(long startId, long num, int d
9291034
return jsonList;
9301035
}
9311036

1037+
/**
1038+
* Generate data with varchar primary key for collection created by createNewCollectionWithVarcharPK
1039+
*
1040+
* @param startId start id
1041+
* @param num number of entities to generate
1042+
* @param dim dimension of vector
1043+
* @param vectorType vector data type
1044+
* @return List of JsonObject representing the data rows
1045+
*/
1046+
public static List<JsonObject> generateDataWithVarcharPK(long startId, long num, int dim, DataType vectorType) {
1047+
List<JsonObject> jsonList = new ArrayList<>();
1048+
Random ran = new Random();
1049+
Gson gson = new Gson();
1050+
for (long i = startId; i < (num + startId); i++) {
1051+
JsonObject row = new JsonObject();
1052+
// Use varchar as primary key
1053+
row.addProperty(CommonData.fieldVarchar, "Str" + i);
1054+
row.addProperty(CommonData.fieldInt64, i);
1055+
row.addProperty(CommonData.fieldInt32, (int) i % 32767);
1056+
row.addProperty(CommonData.fieldInt8, (short) i % 127);
1057+
row.addProperty(CommonData.fieldDouble, (double) i);
1058+
row.addProperty(CommonData.fieldBool, i % 2 == 0);
1059+
row.addProperty(CommonData.fieldFloat, (float) i);
1060+
// Generate vector based on type
1061+
if (vectorType == DataType.FloatVector) {
1062+
List<Float> vector = new ArrayList<>();
1063+
for (int k = 0; k < dim; ++k) {
1064+
vector.add(ran.nextFloat());
1065+
}
1066+
row.add(CommonData.fieldFloatVector, gson.toJsonTree(vector));
1067+
}
1068+
if (vectorType == DataType.BinaryVector) {
1069+
row.add(CommonData.fieldBinaryVector, gson.toJsonTree(generateBinaryVector(dim).array()));
1070+
}
1071+
if (vectorType == DataType.Float16Vector) {
1072+
row.add(CommonData.fieldFloat16Vector, gson.toJsonTree(generateFloat16Vector(dim).array()));
1073+
}
1074+
if (vectorType == DataType.BFloat16Vector) {
1075+
row.add(CommonData.fieldBF16Vector, gson.toJsonTree(generateBF16Vector(dim).array()));
1076+
}
1077+
if (vectorType == DataType.SparseFloatVector) {
1078+
row.add(CommonData.fieldSparseVector, gson.toJsonTree(generateSparseVector(dim)));
1079+
}
1080+
JsonObject json = new JsonObject();
1081+
json.addProperty(CommonData.fieldInt64, (int) i % 32767);
1082+
json.addProperty(CommonData.fieldInt32, (int) i % 32767);
1083+
json.addProperty(CommonData.fieldDouble, (double) i);
1084+
json.addProperty(CommonData.fieldFloat, (float) i);
1085+
row.add(CommonData.fieldJson, json);
1086+
jsonList.add(row);
1087+
}
1088+
return jsonList;
1089+
}
1090+
9321091
public static List<JsonObject> generateDefaultDataWithDynamic(long startId, long num, int dim, DataType vectorType) {
9331092
List<JsonObject> jsonList = new ArrayList<>();
9341093
Random ran = new Random();
@@ -1975,6 +2134,192 @@ public static void multiFilesUpload(String path, List<List<String>> batchFiles)
19752134

19762135
}
19772136

2137+
// ==================== Struct Array Related Methods ====================
2138+
2139+
/**
2140+
* Create a collection schema with Struct field containing vectors
2141+
*
2142+
* @param collectionName collection name
2143+
* @param dim vector dimension
2144+
* @return collection name
2145+
*/
2146+
public static String createStructCollection(String collectionName, int dim) {
2147+
if (collectionName == null || collectionName.isEmpty()) {
2148+
collectionName = "StructCollection_" + GenerateUtil.getRandomString(10);
2149+
}
2150+
2151+
CreateCollectionReq.CollectionSchema collectionSchema = CreateCollectionReq.CollectionSchema.builder()
2152+
.build();
2153+
2154+
// Primary key field
2155+
collectionSchema.addField(AddFieldReq.builder()
2156+
.fieldName(CommonData.fieldInt64)
2157+
.dataType(DataType.Int64)
2158+
.isPrimaryKey(true)
2159+
.autoID(false)
2160+
.build());
2161+
2162+
// Regular float vector field
2163+
collectionSchema.addField(AddFieldReq.builder()
2164+
.fieldName(CommonData.fieldFloatVector)
2165+
.dataType(DataType.FloatVector)
2166+
.dimension(dim)
2167+
.build());
2168+
2169+
// Struct array field with multiple sub-fields including vectors
2170+
collectionSchema.addField(AddFieldReq.builder()
2171+
.fieldName(CommonData.fieldStruct)
2172+
.description("struct array field with vectors")
2173+
.dataType(DataType.Array)
2174+
.elementType(DataType.Struct)
2175+
.maxCapacity(CommonData.structMaxCapacity)
2176+
.addStructField(AddFieldReq.builder()
2177+
.fieldName(CommonData.structFieldInt32)
2178+
.description("int32 field in struct")
2179+
.dataType(DataType.Int32)
2180+
.build())
2181+
.addStructField(AddFieldReq.builder()
2182+
.fieldName(CommonData.structFieldVarchar)
2183+
.description("varchar field in struct")
2184+
.dataType(DataType.VarChar)
2185+
.maxLength(1024)
2186+
.build())
2187+
.addStructField(AddFieldReq.builder()
2188+
.fieldName(CommonData.structFieldFloatVector1)
2189+
.description("first float vector in struct")
2190+
.dataType(DataType.FloatVector)
2191+
.dimension(dim)
2192+
.build())
2193+
.addStructField(AddFieldReq.builder()
2194+
.fieldName(CommonData.structFieldFloatVector2)
2195+
.description("second float vector in struct")
2196+
.dataType(DataType.FloatVector)
2197+
.dimension(dim)
2198+
.build())
2199+
.build());
2200+
2201+
CreateCollectionReq createCollectionReq = CreateCollectionReq.builder()
2202+
.collectionName(collectionName)
2203+
.collectionSchema(collectionSchema)
2204+
.enableDynamicField(false)
2205+
.numShards(1)
2206+
.build();
2207+
2208+
milvusClientV2.createCollection(createCollectionReq);
2209+
log.info("Created struct collection: " + collectionName);
2210+
return collectionName;
2211+
}
2212+
2213+
/**
2214+
* Generate data for struct collection
2215+
*
2216+
* @param startId start id
2217+
* @param count number of rows
2218+
* @param dim vector dimension
2219+
* @return list of JsonObject data
2220+
*/
2221+
public static List<JsonObject> generateStructData(long startId, long count, int dim) {
2222+
List<JsonObject> dataList = new ArrayList<>();
2223+
Random random = new Random();
2224+
2225+
for (long i = startId; i < startId + count; i++) {
2226+
JsonObject row = new JsonObject();
2227+
row.addProperty(CommonData.fieldInt64, i);
2228+
2229+
// Regular float vector
2230+
List<Float> vector = GenerateUtil.generateFloatVector(1, 6, dim).get(0);
2231+
row.add(CommonData.fieldFloatVector, new com.google.gson.Gson().toJsonTree(vector));
2232+
2233+
// Struct array - each row has 3-10 struct elements
2234+
int structCount = random.nextInt(8) + 3;
2235+
JsonArray structArray = new JsonArray();
2236+
for (int j = 0; j < structCount; j++) {
2237+
JsonObject structElement = new JsonObject();
2238+
structElement.addProperty(CommonData.structFieldInt32, random.nextInt(10000));
2239+
structElement.addProperty(CommonData.structFieldVarchar, "struct_desc_" + i + "_" + j);
2240+
2241+
// First vector in struct
2242+
List<Float> vec1 = GenerateUtil.generateFloatVector(1, 6, dim).get(0);
2243+
structElement.add(CommonData.structFieldFloatVector1, new com.google.gson.Gson().toJsonTree(vec1));
2244+
2245+
// Second vector in struct
2246+
List<Float> vec2 = GenerateUtil.generateFloatVector(1, 6, dim).get(0);
2247+
structElement.add(CommonData.structFieldFloatVector2, new com.google.gson.Gson().toJsonTree(vec2));
2248+
2249+
structArray.add(structElement);
2250+
}
2251+
row.add(CommonData.fieldStruct, structArray);
2252+
2253+
dataList.add(row);
2254+
}
2255+
return dataList;
2256+
}
2257+
2258+
/**
2259+
* Create embedding list index for struct vector field
2260+
*
2261+
* @param collectionName collection name
2262+
* @param structFieldName struct field name
2263+
* @param vectorFieldName vector field name in struct
2264+
* @param indexName index name
2265+
* @param metricType metric type (MAX_SIM_COSINE, MAX_SIM_IP, MAX_SIM_L2)
2266+
*/
2267+
public static void createStructVectorIndex(String collectionName, String structFieldName,
2268+
String vectorFieldName, String indexName,
2269+
IndexParam.MetricType metricType) {
2270+
String fullFieldName = String.format("%s[%s]", structFieldName, vectorFieldName);
2271+
IndexParam indexParam = IndexParam.builder()
2272+
.fieldName(fullFieldName)
2273+
.indexName(indexName)
2274+
.indexType(IndexParam.IndexType.HNSW)
2275+
.metricType(metricType)
2276+
.extraParams(new HashMap<String, Object>() {{
2277+
put("M", 16);
2278+
put("efConstruction", 200);
2279+
}})
2280+
.build();
2281+
2282+
milvusClientV2.createIndex(CreateIndexReq.builder()
2283+
.collectionName(collectionName)
2284+
.indexParams(Collections.singletonList(indexParam))
2285+
.build());
2286+
log.info("Created struct vector index: " + indexName + " on " + fullFieldName);
2287+
}
2288+
2289+
/**
2290+
* Generate EmbeddingList from struct query result
2291+
*
2292+
* @param structData struct field data from query result
2293+
* @param vectorFieldName vector field name in struct
2294+
* @return EmbeddingList
2295+
*/
2296+
public static EmbeddingList generateEmbeddingListFromStruct(List<Map<String, Object>> structData,
2297+
String vectorFieldName) {
2298+
EmbeddingList embeddingList = new EmbeddingList();
2299+
for (Map<String, Object> struct : structData) {
2300+
@SuppressWarnings("unchecked")
2301+
List<Float> vector = (List<Float>) struct.get(vectorFieldName);
2302+
embeddingList.add(new FloatVec(vector));
2303+
}
2304+
return embeddingList;
2305+
}
2306+
2307+
/**
2308+
* Generate random EmbeddingList for search
2309+
*
2310+
* @param vectorCount number of vectors in embedding list
2311+
* @param dim vector dimension
2312+
* @return EmbeddingList
2313+
*/
2314+
public static EmbeddingList generateRandomEmbeddingList(int vectorCount, int dim) {
2315+
EmbeddingList embeddingList = new EmbeddingList();
2316+
for (int i = 0; i < vectorCount; i++) {
2317+
List<Float> vector = GenerateUtil.generateFloatVector(1, 6, dim).get(0);
2318+
embeddingList.add(new FloatVec(vector));
2319+
}
2320+
return embeddingList;
2321+
}
2322+
19782323
}
19792324

19802325

0 commit comments

Comments
 (0)