Skip to content

Commit 0f40c3c

Browse files
authored
Support runAnalyzer() interface (#1395)
Signed-off-by: yhmo <yihua.mo@zilliz.com>
1 parent 52dd500 commit 0f40c3c

8 files changed

Lines changed: 235 additions & 12 deletions

File tree

docker-compose.yml

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -77,7 +77,7 @@ services:
7777

7878
standaloneslave:
7979
container_name: milvus-javasdk-test-slave-standalone
80-
image: milvusdb/milvus:v2.5.8
80+
image: milvusdb/milvus:v2.5.11
8181
command: ["milvus", "run", "standalone"]
8282
environment:
8383
ETCD_ENDPOINTS: etcdslave:2379

pom.xml

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -99,13 +99,13 @@
9999
<maven.deploy.plugin.version>3.1.3</maven.deploy.plugin.version>
100100
<junit.platform.version>1.1.0</junit.platform.version>
101101
<junit.jupiter.engine.version>5.10.1</junit.jupiter.engine.version>
102-
<gson.version>2.10.1</gson.version>
102+
<gson.version>2.13.1</gson.version>
103103
<kotlin.version>1.9.10</kotlin.version>
104104
<mockito.version>4.11.0</mockito.version>
105105
<testcontainers.version>1.19.8</testcontainers.version>
106106
<apache.commons.pool2.version>2.12.0</apache.commons.pool2.version>
107107
<guava.version>32.1.3-jre</guava.version>
108-
<errorprone.version>2.21.1</errorprone.version>
108+
<errorprone.version>2.38.0</errorprone.version>
109109

110110
<!--for BulkWriter-->
111111
<plexus.version>3.0.24</plexus.version>

sdk-core/src/main/java/io/milvus/v2/client/MilvusClientV2.java

Lines changed: 18 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -382,7 +382,7 @@ public Boolean getLoadState(GetLoadStateReq request) {
382382
/**
383383
* Get information of all replicas from a collection.
384384
*
385-
* @param request {@link DescribeReplicasReq}
385+
* @param request describe replicas request
386386
*/
387387
public DescribeReplicasResp describeReplicas(DescribeReplicasReq request) {
388388
return rpcUtils.retry(()->collectionService.describeReplicas(this.getRpcStub(), request));
@@ -528,8 +528,8 @@ public SearchResp hybridSearch(HybridSearchReq request) {
528528
* Get queryIterator based on scalar field(s) filtered by boolean expression.
529529
* Note that the order of the returned entities cannot be guaranteed.
530530
*
531-
* @param request {@link QueryIteratorReq}
532-
* @return {status:result code,data: QueryIterator}
531+
* @param request query iterator request
532+
* @return QueryIterator
533533
*/
534534
public QueryIterator queryIterator(QueryIteratorReq request) {
535535
return rpcUtils.retry(()->vectorService.queryIterator(this.getRpcStub(), request));
@@ -538,8 +538,8 @@ public QueryIterator queryIterator(QueryIteratorReq request) {
538538
/**
539539
* Get searchIterator based on a vector field. Use expression to do filtering before search.
540540
*
541-
* @param request {@link SearchIteratorReq}
542-
* @return {status:result code, data: SearchIterator}
541+
* @param request search iterator request
542+
* @return SearchIterator
543543
*/
544544
public SearchIterator searchIterator(SearchIteratorReq request) {
545545
return rpcUtils.retry(()->vectorService.searchIterator(this.getRpcStub(), request));
@@ -548,13 +548,24 @@ public SearchIterator searchIterator(SearchIteratorReq request) {
548548
/**
549549
* Get searchIteratorV2 based on a vector field. Use expression to do filtering before search.
550550
*
551-
* @param request {@link SearchIteratorReqV2}
552-
* @return {status:result code, data: SearchIteratorV2}
551+
* @param request search iterator request V2
552+
* @return SearchIteratorV2
553553
*/
554554
public SearchIteratorV2 searchIteratorV2(SearchIteratorReqV2 request) {
555555
return rpcUtils.retry(()->vectorService.searchIteratorV2(this.getRpcStub(), request));
556556
}
557557

558+
/**
559+
* Run analyzer. Return result tokens of analysis.
560+
* Milvus server supports this interface from v2.5.11
561+
*
562+
* @param request run analyzer request
563+
* @return RunAnalyzerResp
564+
*/
565+
public RunAnalyzerResp runAnalyzer(RunAnalyzerReq request) {
566+
return rpcUtils.retry(()->vectorService.runAnalyzer(this.getRpcStub(), request));
567+
}
568+
558569
/////////////////////////////////////////////////////////////////////////////////////////////
559570
// Partition Operations
560571
/////////////////////////////////////////////////////////////////////////////////////////////

sdk-core/src/main/java/io/milvus/v2/service/vector/VectorService.java

Lines changed: 49 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -19,7 +19,9 @@
1919

2020
package io.milvus.v2.service.vector;
2121

22+
import com.google.protobuf.ByteString;
2223
import io.milvus.common.utils.GTsDict;
24+
import io.milvus.common.utils.JsonUtils;
2325
import io.milvus.exception.ParamException;
2426
import io.milvus.grpc.*;
2527
import io.milvus.orm.iterator.*;
@@ -303,4 +305,51 @@ public GetResp get(MilvusServiceGrpc.MilvusServiceBlockingStub blockingStub, Get
303305
.getResults(queryResp.getQueryResults())
304306
.build();
305307
}
308+
309+
public RunAnalyzerResp runAnalyzer(MilvusServiceGrpc.MilvusServiceBlockingStub blockingStub, RunAnalyzerReq request) {
310+
String title = "RunAnalyzer";
311+
if (request.getTexts().isEmpty()) {
312+
throw new MilvusClientException(ErrorCode.INVALID_PARAMS, "Texts list is empty.");
313+
}
314+
315+
RunAnalyzerRequest.Builder builder = RunAnalyzerRequest.newBuilder();
316+
List<ByteString> byteStrings = new ArrayList<>();
317+
for (String text : request.getTexts()) {
318+
byteStrings.add(ByteString.copyFrom(text.getBytes()));
319+
}
320+
321+
String params = JsonUtils.toJson(request.getAnalyzerParams());
322+
System.out.println(params);
323+
RunAnalyzerRequest runRequest = builder.addAllPlaceholder(byteStrings)
324+
.setAnalyzerParams(params)
325+
.setWithDetail(request.getWithDetail())
326+
.setWithHash(request.getWithHash())
327+
.build();
328+
RunAnalyzerResponse response = blockingStub.runAnalyzer(runRequest);
329+
rpcUtils.handleResponse(title, response.getStatus());
330+
331+
List<RunAnalyzerResp.AnalyzerResult> toResults = new ArrayList<>();
332+
List<AnalyzerResult> results = response.getResultsList();
333+
results.forEach((item)->{
334+
List<RunAnalyzerResp.AnalyzerToken> toTokens = new ArrayList<>();
335+
List<AnalyzerToken> tokens = item.getTokensList();
336+
tokens.forEach((token)->{
337+
toTokens.add(RunAnalyzerResp.AnalyzerToken.builder()
338+
.token(token.getToken())
339+
.startOffset(token.getStartOffset())
340+
.endOffset(token.getEndOffset())
341+
.position(token.getPosition())
342+
.positionLength(token.getPositionLength())
343+
.hash(token.getHash() & 0xFFFFFFFFL)
344+
.build());
345+
});
346+
toResults.add(RunAnalyzerResp.AnalyzerResult.builder()
347+
.tokens(toTokens)
348+
.build());
349+
});
350+
351+
return RunAnalyzerResp.builder()
352+
.results(toResults)
353+
.build();
354+
}
306355
}
Lines changed: 39 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,39 @@
1+
/*
2+
* Licensed to the Apache Software Foundation (ASF) under one
3+
* or more contributor license agreements. See the NOTICE file
4+
* distributed with this work for additional information
5+
* regarding copyright ownership. The ASF licenses this file
6+
* to you under the Apache License, Version 2.0 (the
7+
* "License"); you may not use this file except in compliance
8+
* with the License. You may obtain a copy of the License at
9+
*
10+
* http://www.apache.org/licenses/LICENSE-2.0
11+
*
12+
* Unless required by applicable law or agreed to in writing,
13+
* software distributed under the License is distributed on an
14+
* "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
15+
* KIND, either express or implied. See the License for the
16+
* specific language governing permissions and limitations
17+
* under the License.
18+
*/
19+
20+
package io.milvus.v2.service.vector.request;
21+
22+
import lombok.Builder;
23+
import lombok.Data;
24+
import lombok.experimental.SuperBuilder;
25+
26+
import java.util.*;
27+
28+
@Data
29+
@SuperBuilder
30+
public class RunAnalyzerReq {
31+
@Builder.Default
32+
private List<String> texts = new ArrayList<>();
33+
@Builder.Default
34+
private Map<String, Object> analyzerParams = new HashMap<>();
35+
@Builder.Default
36+
private Boolean withDetail = Boolean.FALSE;
37+
@Builder.Default
38+
private Boolean withHash = Boolean.FALSE;
39+
}
Lines changed: 52 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,52 @@
1+
/*
2+
* Licensed to the Apache Software Foundation (ASF) under one
3+
* or more contributor license agreements. See the NOTICE file
4+
* distributed with this work for additional information
5+
* regarding copyright ownership. The ASF licenses this file
6+
* to you under the Apache License, Version 2.0 (the
7+
* "License"); you may not use this file except in compliance
8+
* with the License. You may obtain a copy of the License at
9+
*
10+
* http://www.apache.org/licenses/LICENSE-2.0
11+
*
12+
* Unless required by applicable law or agreed to in writing,
13+
* software distributed under the License is distributed on an
14+
* "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
15+
* KIND, either express or implied. See the License for the
16+
* specific language governing permissions and limitations
17+
* under the License.
18+
*/
19+
20+
package io.milvus.v2.service.vector.response;
21+
22+
import lombok.Builder;
23+
import lombok.Data;
24+
import lombok.experimental.SuperBuilder;
25+
26+
import java.util.ArrayList;
27+
import java.util.List;
28+
29+
@Data
30+
@SuperBuilder
31+
public class RunAnalyzerResp {
32+
@Builder.Default
33+
List<AnalyzerResult> results = new ArrayList<>();
34+
35+
@Data
36+
@SuperBuilder
37+
public static final class AnalyzerResult {
38+
@Builder.Default
39+
List<AnalyzerToken> tokens = new ArrayList<>();
40+
}
41+
42+
@Data
43+
@SuperBuilder
44+
public static final class AnalyzerToken {
45+
private String token;
46+
private Long startOffset;
47+
private Long endOffset;
48+
private Long position;
49+
private Long positionLength;
50+
private Long hash;
51+
}
52+
}

sdk-core/src/test/java/io/milvus/client/MilvusClientDockerTest.java

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -75,7 +75,7 @@ class MilvusClientDockerTest {
7575
private static final TestUtils utils = new TestUtils(DIMENSION);
7676

7777
@Container
78-
private static final MilvusContainer milvus = new MilvusContainer("milvusdb/milvus:v2.5.8");
78+
private static final MilvusContainer milvus = new MilvusContainer("milvusdb/milvus:v2.5.11");
7979

8080
@BeforeAll
8181
public static void setUp() {

sdk-core/src/test/java/io/milvus/v2/client/MilvusClientV2DockerTest.java

Lines changed: 73 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -81,7 +81,7 @@ class MilvusClientV2DockerTest {
8181
private static final TestUtils utils = new TestUtils(DIMENSION);
8282

8383
@Container
84-
private static final MilvusContainer milvus = new MilvusContainer("milvusdb/milvus:v2.5.8");
84+
private static final MilvusContainer milvus = new MilvusContainer("milvusdb/milvus:v2.5.11");
8585

8686
@BeforeAll
8787
public static void setUp() {
@@ -2379,4 +2379,76 @@ void testReplica() {
23792379
Assertions.assertFalse(replica.getLeaderAddress().isEmpty());
23802380
Assertions.assertNotEquals(0L, replica.getLeaderID());
23812381
}
2382+
2383+
@Test
2384+
void testRunAnalyzer() {
2385+
List<String> texts = new ArrayList<>();
2386+
texts.add("Analyzers (tokenizers) for multi languages");
2387+
texts.add("2.5 to take advantage of enhancements and fixes!");
2388+
2389+
Map<String, Object> analyzerParams = new HashMap<>();
2390+
analyzerParams.put("tokenizer", "standard");
2391+
analyzerParams.put("filter",
2392+
Arrays.asList("lowercase",
2393+
new HashMap<String, Object>() {{
2394+
put("type", "stop");
2395+
put("stop_words", Arrays.asList("to", "of", "for", "the"));
2396+
}}));
2397+
2398+
RunAnalyzerResp resp = client.runAnalyzer(RunAnalyzerReq.builder()
2399+
.texts(texts)
2400+
.analyzerParams(analyzerParams)
2401+
.withDetail(true)
2402+
.withHash(true)
2403+
.build());
2404+
2405+
List<RunAnalyzerResp.AnalyzerResult> results = resp.getResults();
2406+
Assertions.assertEquals(texts.size(), results.size());
2407+
2408+
{
2409+
List<String> tokens1 = Arrays.asList("analyzers", "tokenizers", "multi", "languages");
2410+
List<Long> startOffset1 = Arrays.asList(0L, 11L, 27L, 33L);
2411+
List<Long> endOffset1 = Arrays.asList(9L, 21L, 32L, 42L);
2412+
List<Long> position1 = Arrays.asList(0L, 1L, 3L, 4L);
2413+
List<Long> positionLen1 = Arrays.asList(1L, 1L, 1L, 1L);
2414+
List<Long> hash1 = Arrays.asList(1356745679L, 4089107865L, 3314631429L, 2698072953L);
2415+
2416+
List<RunAnalyzerResp.AnalyzerToken> outTokens1 = results.get(0).getTokens();
2417+
System.out.printf("%d tokens%n", outTokens1.size());
2418+
Assertions.assertEquals(tokens1.size(), outTokens1.size());
2419+
for (int i = 0; i < outTokens1.size(); i++) {
2420+
RunAnalyzerResp.AnalyzerToken token = outTokens1.get(i);
2421+
System.out.println(token);
2422+
Assertions.assertEquals(tokens1.get(i), token.getToken());
2423+
Assertions.assertEquals(startOffset1.get(i), token.getStartOffset());
2424+
Assertions.assertEquals(endOffset1.get(i), token.getEndOffset());
2425+
Assertions.assertEquals(position1.get(i), token.getPosition());
2426+
Assertions.assertEquals(positionLen1.get(i), token.getPositionLength());
2427+
Assertions.assertEquals(hash1.get(i), token.getHash());
2428+
}
2429+
}
2430+
2431+
{
2432+
List<String> tokens2 = Arrays.asList("2", "5", "take", "advantage", "enhancements", "and", "fixes");
2433+
List<Long> startOffset2 = Arrays.asList(0L, 2L, 7L, 12L, 25L, 38L, 42L);
2434+
List<Long> endOffset2 = Arrays.asList(1L, 3L, 11L, 21L, 37L, 41L, 47L);
2435+
List<Long> position2 = Arrays.asList(0L, 1L, 3L, 4L, 6L, 7L, 8L);
2436+
List<Long> positionLen2 = Arrays.asList(1L, 1L, 1L, 1L, 1L, 1L, 1L);
2437+
List<Long> hash2 = Arrays.asList(450215437L, 2226203566L, 937258619L, 697180577L, 3403941281L, 133536621L, 488262645L);
2438+
2439+
List<RunAnalyzerResp.AnalyzerToken> outTokens2 = results.get(1).getTokens();
2440+
System.out.printf("%d tokens%n", outTokens2.size());
2441+
Assertions.assertEquals(tokens2.size(), outTokens2.size());
2442+
for (int i = 0; i < outTokens2.size(); i++) {
2443+
RunAnalyzerResp.AnalyzerToken token = outTokens2.get(i);
2444+
System.out.println(token);
2445+
Assertions.assertEquals(tokens2.get(i), token.getToken());
2446+
Assertions.assertEquals(startOffset2.get(i), token.getStartOffset());
2447+
Assertions.assertEquals(endOffset2.get(i), token.getEndOffset());
2448+
Assertions.assertEquals(position2.get(i), token.getPosition());
2449+
Assertions.assertEquals(positionLen2.get(i), token.getPositionLength());
2450+
Assertions.assertEquals(hash2.get(i), token.getHash());
2451+
}
2452+
}
2453+
}
23822454
}

0 commit comments

Comments
 (0)