Skip to content

Commit a38d4cd

Browse files
authored
New metric type for struct vectors (#1642)
Signed-off-by: yhmo <yihua.mo@zilliz.com>
1 parent 1f069f5 commit a38d4cd

5 files changed

Lines changed: 57 additions & 30 deletions

File tree

docker-compose.yml

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -3,7 +3,7 @@ version: '3.5'
33
services:
44
standalone:
55
container_name: milvus-javasdk-standalone-1
6-
image: milvusdb/milvus:master-20250929-ca1cc7c9-amd64
6+
image: milvusdb/milvus:2.6-20251015-bb4446e5-amd64
77
command: [ "milvus", "run", "standalone" ]
88
environment:
99
- COMMON_STORAGETYPE=local
@@ -24,7 +24,7 @@ services:
2424

2525
standaloneslave:
2626
container_name: milvus-javasdk-standalone-2
27-
image: milvusdb/milvus:master-20250929-ca1cc7c9-amd64
27+
image: milvusdb/milvus:2.6-20251015-bb4446e5-amd64
2828
command: [ "milvus", "run", "standalone" ]
2929
environment:
3030
- COMMON_STORAGETYPE=local

sdk-core/src/main/java/io/milvus/v2/common/IndexParam.java

Lines changed: 8 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -53,8 +53,14 @@ public enum MetricType {
5353
// Only for sparse vector with BM25
5454
BM25,
5555

56-
// Only for struct vector
57-
MAX_SIM,
56+
// Only for float vector inside struct
57+
MAX_SIM, // equal to MAX_SIM_COSINE
58+
MAX_SIM_COSINE,
59+
MAX_SIM_IP,
60+
MAX_SIM_L2,
61+
// Only for binary vector inside struct
62+
MAX_SIM_JACCARD,
63+
MAX_SIM_HAMMING,
5864
;
5965
}
6066

@@ -105,9 +111,6 @@ public enum IndexType {
105111
// From Milvus 2.5.4 onward, SPARSE_WAND is being deprecated. Instead, it is recommended to
106112
// use "inverted_index_algo": "DAAT_WAND" for equivalency while maintaining compatibility.
107113
SPARSE_WAND(301),
108-
109-
// Only for struct vector
110-
EMB_LIST_HNSW(401),
111114
;
112115

113116
private final String name;

sdk-core/src/main/java/io/milvus/v2/utils/DataUtils.java

Lines changed: 18 additions & 11 deletions
Original file line numberDiff line numberDiff line change
@@ -371,10 +371,19 @@ private static FieldData genStructSubFieldData(CreateCollectionReq.FieldSchema f
371371
@SuppressWarnings("unchecked")
372372
public static VectorArray genVectorArray(DataType dataType, List<?> objects) {
373373
VectorArray.Builder builder = VectorArray.newBuilder().setElementType(dataType);
374-
if (dataType == DataType.FloatVector) {
375-
// each object is List<List<Float>>
376-
for (Object object : objects) {
377-
if (object instanceof List) {
374+
switch (dataType) {
375+
case FloatVector:
376+
case BinaryVector:
377+
case Float16Vector:
378+
case BFloat16Vector:
379+
case Int8Vector: {
380+
// for FloatVector, objects is List<List<List<Float>>>
381+
// for others, objects is List<List<List<ByteBuffer>>>
382+
for (Object object : objects) {
383+
if (!(object instanceof List)) {
384+
throw new MilvusClientException(ErrorCode.INVALID_PARAMS, "Input value is not List<> for type: " + dataType.name());
385+
}
386+
378387
List<?> listOfList = (List<?>) object;
379388
if (listOfList.isEmpty()) {
380389
// struct field value is empty, fill the VectorArray with zero-dim vectors?
@@ -394,16 +403,14 @@ public static VectorArray genVectorArray(DataType dataType, List<?> objects) {
394403
throw new MilvusClientException(ErrorCode.INVALID_PARAMS, msg);
395404
}
396405
builder.addData(vf);
397-
} else {
398-
throw new MilvusClientException(ErrorCode.INVALID_PARAMS, "The type of FloatVector must be List<>");
399406
}
407+
return builder.build();
400408
}
401-
402-
return builder.build();
409+
default:
410+
// so far, struct field only supports FloatVector/BinaryVector/Float16Vector/BFloat16Vector/Int8Vector
411+
String msg = String.format("Illegal vector dataType %s for struct field", dataType.name());
412+
throw new MilvusClientException(ErrorCode.INVALID_PARAMS, msg);
403413
}
404-
// so far, struct field only supports FloatVector
405-
String msg = String.format("Illegal vector dataType %s for struct field", dataType.name());
406-
throw new MilvusClientException(ErrorCode.INVALID_PARAMS, msg);
407414
}
408415

409416
public DeleteRequest ConvertToGrpcDeleteRequest(DeleteReq request) {

sdk-core/src/test/java/io/milvus/TestUtils.java

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -11,7 +11,7 @@ public class TestUtils {
1111
private int dimension = 256;
1212
private static final Random RANDOM = new Random();
1313

14-
public static final String MilvusDockerImageID = "milvusdb/milvus:master-20250929-ca1cc7c9-amd64";
14+
public static final String MilvusDockerImageID = "milvusdb/milvus:2.6-20251015-bb4446e5-amd64";
1515

1616
public TestUtils(int dimension) {
1717
this.dimension = dimension;

sdk-core/src/test/java/io/milvus/v2/client/MilvusClientV2DockerTest.java

Lines changed: 28 additions & 11 deletions
Original file line numberDiff line numberDiff line change
@@ -1028,6 +1028,7 @@ void testStruct() {
10281028
String structField = "clips";
10291029
String structScalarField = "desc";
10301030
String structVectorField = "clip";
1031+
String structBinVectorField = "clip_bin";
10311032
int structCapacity = 300;
10321033
int varcharLength = 100;
10331034
CreateCollectionReq.CollectionSchema collectionSchema = CreateCollectionReq.CollectionSchema.builder()
@@ -1065,6 +1066,12 @@ void testStruct() {
10651066
.dataType(DataType.FloatVector)
10661067
.dimension(DIMENSION)
10671068
.build())
1069+
// .addStructField(AddFieldReq.builder()
1070+
// .fieldName(structBinVectorField)
1071+
// .description("dummy")
1072+
// .dataType(DataType.BinaryVector)
1073+
// .dimension(DIMENSION)
1074+
// .build())
10681075
.build());
10691076

10701077
client.dropCollection(DropCollectionReq.builder()
@@ -1084,10 +1091,17 @@ void testStruct() {
10841091
.metricType(IndexParam.MetricType.COSINE)
10851092
.build());
10861093
indexParams.add(IndexParam.builder()
1087-
.fieldName(structVectorField)
1088-
.indexType(IndexParam.IndexType.EMB_LIST_HNSW)
1089-
.metricType(IndexParam.MetricType.MAX_SIM)
1090-
.build());
1094+
.fieldName("clips[clip]")
1095+
.indexName("index1")
1096+
.indexType(IndexParam.IndexType.HNSW)
1097+
.metricType(IndexParam.MetricType.MAX_SIM_L2)
1098+
.build());
1099+
// indexParams.add(IndexParam.builder()
1100+
// .fieldName("clips[clip_bin]")
1101+
// .indexName("index2")
1102+
// .indexType(IndexParam.IndexType.AUTOINDEX)
1103+
// .metricType(IndexParam.MetricType.MAX_SIM_HAMMING)
1104+
// .build());
10911105
client.createIndex(CreateIndexReq.builder()
10921106
.collectionName(randomCollectionName)
10931107
.indexParams(indexParams)
@@ -1125,11 +1139,12 @@ void testStruct() {
11251139
DescribeIndexResp indexDesc = client.describeIndex(DescribeIndexReq.builder()
11261140
.collectionName(randomCollectionName)
11271141
.fieldName(structVectorField)
1142+
.indexName("index1")
11281143
.build());
11291144
Assertions.assertEquals(1, indexDesc.getIndexDescriptions().size());
11301145
DescribeIndexResp.IndexDesc desc = indexDesc.getIndexDescriptions().get(0);
1131-
Assertions.assertEquals(IndexParam.IndexType.EMB_LIST_HNSW, desc.getIndexType());
1132-
Assertions.assertEquals(IndexParam.MetricType.MAX_SIM, desc.getMetricType());
1146+
Assertions.assertEquals(IndexParam.IndexType.HNSW, desc.getIndexType());
1147+
Assertions.assertEquals(IndexParam.MetricType.MAX_SIM_L2, desc.getMetricType());
11331148

11341149
// insert
11351150
List<JsonObject> rows = new ArrayList<>();
@@ -1144,6 +1159,7 @@ void testStruct() {
11441159
JsonObject struct = new JsonObject();
11451160
struct.addProperty(structScalarField, "No." + k);
11461161
struct.add(structVectorField, JsonUtils.toJsonTree(utils.generateFloatVector()));
1162+
// struct.add(structBinVectorField, JsonUtils.toJsonTree(utils.generateBinaryVector(DIMENSION).array()));
11471163
structArr.add(struct);
11481164
}
11491165
row.add(structField, structArr);
@@ -1166,6 +1182,7 @@ void testStruct() {
11661182
JsonObject struct = new JsonObject();
11671183
struct.addProperty(structScalarField, "updated_No." + k);
11681184
struct.add(structVectorField, JsonUtils.toJsonTree(utils.generateFloatVector()));
1185+
// struct.add(structBinVectorField, JsonUtils.toJsonTree(utils.generateBinaryVector(DIMENSION).array()));
11691186
structArr.add(struct);
11701187
}
11711188
row.add(structField, structArr);
@@ -1190,25 +1207,25 @@ void testStruct() {
11901207
Assertions.assertTrue(queryResults.get(1).getEntity().containsKey(structField));
11911208

11921209
// search
1193-
List<Map<String, Object>> structs0 = (List<Map<String, Object>>)queryResults.get(0).getEntity().get(structField);
11941210
EmbeddingList embList0 = new EmbeddingList();
1211+
EmbeddingList embList1 = new EmbeddingList();
1212+
1213+
List<Map<String, Object>> structs0 = (List<Map<String, Object>>)queryResults.get(0).getEntity().get(structField);
11951214
for (Map<String, Object> struct : structs0) {
11961215
embList0.add(new FloatVec((List<Float>)struct.get(structVectorField)));
11971216
}
1198-
11991217
List<Map<String, Object>> structs1 = (List<Map<String, Object>>)queryResults.get(1).getEntity().get(structField);
1200-
EmbeddingList embList1 = new EmbeddingList();
12011218
for (Map<String, Object> struct : structs1) {
12021219
embList1.add(new FloatVec((List<Float>)struct.get(structVectorField)));
12031220
}
12041221

12051222
int topK = 5;
12061223
SearchResp searchResp = client.search(SearchReq.builder()
12071224
.collectionName(randomCollectionName)
1208-
.annsField(structVectorField)
1225+
.annsField("clips[clip]")
12091226
.data(Arrays.asList(embList0, embList1))
12101227
.limit(topK)
1211-
.outputFields(Collections.singletonList(structScalarField))
1228+
.outputFields(Collections.singletonList("clips[desc]"))
12121229
.build());
12131230
List<List<SearchResp.SearchResult>> searchResults = searchResp.getSearchResults();
12141231
Assertions.assertEquals(2, searchResults.size());

0 commit comments

Comments
 (0)