Skip to content

Commit 1f069f5

Browse files
authored
Support NGRAM and BoostRanker (#1641)
Signed-off-by: yhmo <yihua.mo@zilliz.com>
1 parent f91b039 commit 1f069f5

6 files changed

Lines changed: 424 additions & 18 deletions

File tree

examples/src/main/java/io/milvus/v2/HybridSearchExample.java

Lines changed: 11 additions & 11 deletions
Original file line numberDiff line numberDiff line change
@@ -73,7 +73,7 @@ public class HybridSearchExample {
7373
private static final String SPARSE_VECTOR_FIELD = "sparse_vector";
7474
private static final IndexParam.MetricType SPARSE_VECTOR_METRIC = IndexParam.MetricType.IP;
7575

76-
private void createCollection() {
76+
private static void createCollection() {
7777
client.dropCollection(DropCollectionReq.builder()
7878
.collectionName(COLLECTION_NAME)
7979
.build());
@@ -147,7 +147,7 @@ private void createCollection() {
147147
System.out.println("Collection created");
148148
}
149149

150-
private void insertData() {
150+
private static void insertData() {
151151
long idCount = 0;
152152
int rowCount = 10000;
153153
// Insert entities by rows
@@ -167,19 +167,20 @@ private void insertData() {
167167
.collectionName(COLLECTION_NAME)
168168
.data(rows)
169169
.build());
170-
171-
System.out.printf("%d entities inserted by rows\n", rowCount);
170+
printRowCount();
172171
}
173172

174-
private void hybridSearch() {
173+
private static void printRowCount() {
175174
// Get row count, set ConsistencyLevel.STRONG to sync the data to query node so that data is visible
176175
QueryResp countR = client.query(QueryReq.builder()
177176
.collectionName(COLLECTION_NAME)
178177
.outputFields(Collections.singletonList("count(*)"))
179178
.consistencyLevel(ConsistencyLevel.STRONG)
180179
.build());
181180
System.out.printf("%d rows persisted\n", (long)countR.getQueryResults().get(0).getEntity().get("count(*)"));
181+
}
182182

183+
private static void hybridSearch() {
183184
// Search on multiple vector fields
184185
int NQ = 2;
185186
List<BaseVector> floatVectors = new ArrayList<>();
@@ -229,19 +230,18 @@ private void hybridSearch() {
229230
}
230231
}
231232

232-
private void dropCollection() {
233+
private static void dropCollection() {
233234
client.dropCollection(DropCollectionReq.builder()
234235
.collectionName(COLLECTION_NAME)
235236
.build());
236237
System.out.println("Collection dropped");
237238
}
238239

239240
public static void main(String[] args) {
240-
io.milvus.v2.HybridSearchExample example = new io.milvus.v2.HybridSearchExample();
241-
example.createCollection();
242-
example.insertData();
243-
example.hybridSearch();
244-
example.dropCollection();
241+
createCollection();
242+
insertData();
243+
hybridSearch();
244+
dropCollection();
245245

246246
client.close();
247247
}

examples/src/main/java/io/milvus/v2/JsonFieldExample.java

Lines changed: 28 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -98,6 +98,32 @@ public static void main(String[] args) {
9898
.metricType(IndexParam.MetricType.COSINE)
9999
.build());
100100

101+
// Create INVERTED index for a specific entry of JSON field
102+
// Index for JSON field is supported from milvus v2.5.7 and fully supported in v2.5.13+
103+
// Read the doc for more info: https://milvus.io/docs/json-indexing.md
104+
Map<String,Object> p1 = new HashMap<>();
105+
p1.put("json_path", "metadata[\"flags\"]");
106+
p1.put("json_cast_type", "array_double");
107+
indexes.add(IndexParam.builder()
108+
.fieldName(JSON_FIELD)
109+
.indexType(IndexParam.IndexType.INVERTED)
110+
.extraParams(p1)
111+
.build());
112+
113+
// Create NGRAM index for a specific entry of JSON field
114+
// NGRAM index for JSON field is supported from milvus v2.6.2
115+
// Read the doc for more info: https://milvus.io/docs/ngram.md
116+
Map<String,Object> p2 = new HashMap<>();
117+
p2.put("json_path","metadata[\"path\"]");
118+
p2.put("json_cast_type", "varchar");
119+
p2.put("min_gram", 3);
120+
p2.put("max_gram", 5);
121+
indexes.add(IndexParam.builder()
122+
.fieldName(JSON_FIELD)
123+
.indexType(IndexParam.IndexType.NGRAM)
124+
.extraParams(p2)
125+
.build());
126+
101127
CreateCollectionReq requestCreate = CreateCollectionReq.builder()
102128
.collectionName(COLLECTION_NAME)
103129
.collectionSchema(collectionSchema)
@@ -121,7 +147,7 @@ public static void main(String[] args) {
121147
// Note: for JSON field, always construct a real JsonObject
122148
// don't use row.addProperty(JSON_FIELD, strContent) since the value is treated as a string, not a JsonObject
123149
JsonObject metadata = new JsonObject();
124-
metadata.addProperty("path", String.format("\\root/abc/path_%d", i));
150+
metadata.addProperty("path", String.format("\\root/abc_%d/path_%d", i, i));
125151
metadata.addProperty("size", i);
126152
if (i%7 == 0) {
127153
metadata.addProperty("special", true);
@@ -197,6 +223,7 @@ public static void main(String[] args) {
197223
queryWithExpr(client, "JSON_CONTAINS(metadata[\"flags\"], 9)");
198224
queryWithExpr(client, "JSON_CONTAINS_ANY(metadata[\"flags\"], [8, 9, 10])");
199225
queryWithExpr(client, "JSON_CONTAINS_ALL(metadata[\"flags\"], [8, 9, 10])");
226+
queryWithExpr(client, "metadata[\"path\"] LIKE \"%c_5%\"");
200227
queryWithExpr(client, "dynamic1 < 2.0");
201228

202229
client.close();
Lines changed: 267 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,267 @@
1+
/*
2+
* Licensed to the Apache Software Foundation (ASF) under one
3+
* or more contributor license agreements. See the NOTICE file
4+
* distributed with this work for additional information
5+
* regarding copyright ownership. The ASF licenses this file
6+
* to you under the Apache License, Version 2.0 (the
7+
* "License"); you may not use this file except in compliance
8+
* with the License. You may obtain a copy of the License at
9+
*
10+
* http://www.apache.org/licenses/LICENSE-2.0
11+
*
12+
* Unless required by applicable law or agreed to in writing,
13+
* software distributed under the License is distributed on an
14+
* "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
15+
* KIND, either express or implied. See the License for the
16+
* specific language governing permissions and limitations
17+
* under the License.
18+
*/
19+
20+
package io.milvus.v2;
21+
22+
import com.google.gson.JsonObject;
23+
import io.milvus.common.clientenum.FunctionType;
24+
import io.milvus.v2.client.ConnectConfig;
25+
import io.milvus.v2.client.MilvusClientV2;
26+
import io.milvus.v2.common.ConsistencyLevel;
27+
import io.milvus.v2.common.DataType;
28+
import io.milvus.v2.common.IndexParam;
29+
import io.milvus.v2.service.collection.request.AddFieldReq;
30+
import io.milvus.v2.service.collection.request.CreateCollectionReq;
31+
import io.milvus.v2.service.collection.request.DropCollectionReq;
32+
import io.milvus.v2.service.vector.request.FunctionScore;
33+
import io.milvus.v2.service.vector.request.InsertReq;
34+
import io.milvus.v2.service.vector.request.QueryReq;
35+
import io.milvus.v2.service.vector.request.SearchReq;
36+
import io.milvus.v2.service.vector.request.data.EmbeddedText;
37+
import io.milvus.v2.service.vector.request.ranker.BoostRanker;
38+
import io.milvus.v2.service.vector.request.ranker.DecayRanker;
39+
import io.milvus.v2.service.vector.response.QueryResp;
40+
import io.milvus.v2.service.vector.response.SearchResp;
41+
42+
import java.util.*;
43+
44+
public class RankerExample {
45+
private static final MilvusClientV2 client;
46+
47+
static {
48+
ConnectConfig config = ConnectConfig.builder()
49+
.uri("http://localhost:19530")
50+
.build();
51+
client = new MilvusClientV2(config);
52+
}
53+
54+
private static final String COLLECTION_NAME = "java_sdk_example_ranker_v2";
55+
private static final String NAME_FIELD = "name";
56+
private static final String BIRTH_YEAR_FIELD = "birth_year";
57+
private static final String LIFESPAN_FIELD = "lifespan";
58+
private static final String SPARSE_VECTOR_FIELD = "sparse_vector";
59+
60+
private static class Person {
61+
public String name;
62+
public int fromYear;
63+
public int toYear;
64+
public Person(String name, int from, int to) {
65+
this.name = name;
66+
this.fromYear = from;
67+
this.toYear = to;
68+
}
69+
}
70+
71+
private static List<Person> genData() {
72+
List<Person> persons = new ArrayList<>();
73+
persons.add(new Person("Isaac Newton", 1643, 1727));
74+
persons.add(new Person("Albert Einstein", 1879, 1955));
75+
persons.add(new Person("Marie Curie", 1867, 1934));
76+
persons.add(new Person("Charles Darwin", 1809, 1882));
77+
persons.add(new Person("Galileo Galilei", 1564, 1642));
78+
persons.add(new Person("Nikola Tesla", 1856, 1943));
79+
persons.add(new Person("James Clerk Maxwell", 1831, 1879));
80+
persons.add(new Person("Thomas Edison", 1847, 1931));
81+
persons.add(new Person("Alexander Fleming", 1881, 1955));
82+
persons.add(new Person("Louis Pasteur", 1822, 1895));
83+
persons.add(new Person("Werner Heisenberg", 1901, 1976));
84+
persons.add(new Person("Stephen Hawking", 1942, 2018));
85+
persons.add(new Person("Dmitri Mendeleev", 1834, 1907));
86+
persons.add(new Person("Max Planck", 1858, 1947));
87+
persons.add(new Person("Niels Bohr", 1885, 1962));
88+
persons.add(new Person("Richard Feynman", 1918, 1988));
89+
persons.add(new Person("Carl Sagan", 1934, 1996));
90+
persons.add(new Person("Francis Crick", 1916, 2004));
91+
persons.add(new Person("Rosalind Franklin", 1920, 1958));
92+
persons.add(new Person("Edwin Hubble", 1889, 1953));
93+
persons.add(new Person("Linus Pauling", 1901, 1994));
94+
persons.add(new Person("Alan Turing", 1912, 1954));
95+
persons.add(new Person("Guglielmo Marconi", 1874, 1937));
96+
persons.add(new Person("Michael Faraday", 1791, 1867));
97+
persons.add(new Person("Enrico Fermi", 1901, 1954));
98+
persons.add(new Person("Johannes Kepler", 1571, 1630));
99+
persons.add(new Person("Edwin Schrödinger", 1887, 1961));
100+
persons.add(new Person("Werner von Braun", 1912, 1977));
101+
persons.add(new Person("Albert Hofmann", 1906, 2008));
102+
persons.add(new Person("Robert Oppenheimer", 1904, 1967));
103+
persons.add(new Person("Edwin Land", 1909, 1991));
104+
persons.add(new Person("Rachel Carson", 1907, 1964));
105+
persons.add(new Person("Ernest Rutherford", 1871, 1937));
106+
persons.add(new Person("Hans Geiger", 1882, 1945));
107+
persons.add(new Person("John Bardeen", 1908, 1991));
108+
persons.add(new Person("George Washington Carver", 1864, 1943));
109+
return persons;
110+
}
111+
112+
private static void createCollection() {
113+
client.dropCollection(DropCollectionReq.builder()
114+
.collectionName(COLLECTION_NAME)
115+
.build());
116+
117+
// Create collection
118+
CreateCollectionReq.CollectionSchema collectionSchema = CreateCollectionReq.CollectionSchema.builder()
119+
.build();
120+
collectionSchema.addField(AddFieldReq.builder()
121+
.fieldName(NAME_FIELD)
122+
.dataType(DataType.VarChar)
123+
.isPrimaryKey(Boolean.TRUE)
124+
.maxLength(1024)
125+
.enableAnalyzer(true)
126+
.build());
127+
collectionSchema.addField(AddFieldReq.builder()
128+
.fieldName(BIRTH_YEAR_FIELD)
129+
.dataType(DataType.Int64)
130+
.build());
131+
collectionSchema.addField(AddFieldReq.builder()
132+
.fieldName(LIFESPAN_FIELD)
133+
.dataType(DataType.Int8)
134+
.build());
135+
collectionSchema.addField(AddFieldReq.builder()
136+
.fieldName(SPARSE_VECTOR_FIELD)
137+
.dataType(DataType.SparseFloatVector)
138+
.build());
139+
140+
collectionSchema.addFunction(CreateCollectionReq.Function.builder()
141+
.functionType(FunctionType.BM25)
142+
.name("function_bm25")
143+
.inputFieldNames(Collections.singletonList(NAME_FIELD))
144+
.outputFieldNames(Collections.singletonList(SPARSE_VECTOR_FIELD))
145+
.build());
146+
147+
List<IndexParam> indexes = new ArrayList<>();
148+
indexes.add(IndexParam.builder()
149+
.fieldName(SPARSE_VECTOR_FIELD)
150+
.indexType(IndexParam.IndexType.SPARSE_INVERTED_INDEX)
151+
.metricType(IndexParam.MetricType.BM25)
152+
.build());
153+
154+
CreateCollectionReq requestCreate = CreateCollectionReq.builder()
155+
.collectionName(COLLECTION_NAME)
156+
.collectionSchema(collectionSchema)
157+
.indexParams(indexes)
158+
.consistencyLevel(ConsistencyLevel.BOUNDED)
159+
.build();
160+
client.createCollection(requestCreate);
161+
System.out.println("Collection created");
162+
}
163+
164+
private static void insertData() {
165+
List<JsonObject> rows = new ArrayList<>();
166+
List<Person> data = genData();
167+
for (Person person : data) {
168+
JsonObject row = new JsonObject();
169+
row.addProperty(NAME_FIELD, person.name);
170+
row.addProperty(BIRTH_YEAR_FIELD, person.fromYear);
171+
row.addProperty(LIFESPAN_FIELD, person.toYear - person.fromYear);
172+
rows.add(row);
173+
}
174+
175+
client.insert(InsertReq.builder()
176+
.collectionName(COLLECTION_NAME)
177+
.data(rows)
178+
.build());
179+
printRowCount();
180+
}
181+
182+
private static void printRowCount() {
183+
// Get row count, set ConsistencyLevel.STRONG to sync the data to query node so that data is visible
184+
QueryResp countR = client.query(QueryReq.builder()
185+
.collectionName(COLLECTION_NAME)
186+
.outputFields(Collections.singletonList("count(*)"))
187+
.consistencyLevel(ConsistencyLevel.STRONG)
188+
.build());
189+
System.out.printf("%d rows persisted\n", (long)countR.getQueryResults().get(0).getEntity().get("count(*)"));
190+
}
191+
192+
private static void dropCollection() {
193+
client.dropCollection(DropCollectionReq.builder()
194+
.collectionName(COLLECTION_NAME)
195+
.build());
196+
System.out.println("Collection dropped");
197+
}
198+
199+
private static void searchWithRanker(String text, CreateCollectionReq.Function rankerFunction) {
200+
System.out.println("\n=============================================================");
201+
SearchReq.SearchReqBuilder builder = SearchReq.builder()
202+
.collectionName(COLLECTION_NAME)
203+
.data(Collections.singletonList(new EmbeddedText(text)))
204+
.limit(100)
205+
.outputFields(Arrays.asList(BIRTH_YEAR_FIELD, LIFESPAN_FIELD));
206+
207+
if (rankerFunction != null) {
208+
builder.functionScore(FunctionScore.builder()
209+
.addFunction(rankerFunction)
210+
.build());
211+
System.out.printf("Search text '%s' with ranker '%s'\n\n", text, rankerFunction.getName());
212+
} else {
213+
System.out.printf("Search text '%s' without ranker\n\n", text);
214+
}
215+
216+
// The text is tokenized inside server and turned into a sparse embedding to compare with the vector field
217+
SearchResp searchResp = client.search(builder.build());
218+
List<List<SearchResp.SearchResult>> searchResults = searchResp.getSearchResults();
219+
for (List<SearchResp.SearchResult> results : searchResults) {
220+
for (SearchResp.SearchResult result : results) {
221+
System.out.println(result);
222+
}
223+
}
224+
}
225+
226+
private static void searchWithoutRanker(String text) {
227+
searchWithRanker(text, null);
228+
}
229+
230+
public static void main(String[] args) {
231+
createCollection();
232+
insertData();
233+
234+
// Search scientists with name or surname
235+
String scientists = "Albert, Charles, Darwin and Edwin";
236+
searchWithoutRanker(scientists);
237+
238+
// Search scientists with name or surname
239+
// Rerank the results by linear decay, the scores are rearranged according to the birth years
240+
// Read the doc for more info: https://milvus.io/docs/decay-ranker-overview.md
241+
// The scientist whose birth year is close to 1900 will get a high score
242+
DecayRanker decay = DecayRanker.builder()
243+
.name("birth_year_linear_decay")
244+
.inputFieldNames(Collections.singletonList(BIRTH_YEAR_FIELD))
245+
.function("linear")
246+
.origin(1900)
247+
.scale(50)
248+
.offset(0)
249+
.decay(0.1)
250+
.build();
251+
searchWithRanker(scientists, decay);
252+
253+
// Search scientists with name or surname
254+
// Rerank the results by boost, the scores are rearranged according to the birth years
255+
// Read the doc for more info: https://milvus.io/docs/boost-ranker.md
256+
// The scientist whose lifespan is between 60 and 70 will get a high score
257+
BoostRanker boost = BoostRanker.builder()
258+
.name("boost_on_lifespan")
259+
.filter(String.format("%s > 60 and %s < 70", LIFESPAN_FIELD, LIFESPAN_FIELD))
260+
.weight(5.0f)
261+
.build();
262+
searchWithRanker(scientists, boost);
263+
264+
dropCollection();
265+
client.close();
266+
}
267+
}

0 commit comments

Comments
 (0)