3333public class RpcUtils {
3434
3535 protected static final Logger logger = LoggerFactory .getLogger (RpcUtils .class );
36+ private static final String GLOBAL_ROUTING_ERROR = "STREAMING_CODE_REPLICATE_VIOLATION" ;
3637 private RetryConfig retryConfig = RetryConfig .builder ().build ();
3738 private Runnable globalRefreshTrigger ;
3839
@@ -44,6 +45,37 @@ public void setGlobalRefreshTrigger(Runnable trigger) {
4445 this .globalRefreshTrigger = trigger ;
4546 }
4647
48+ private void handleGlobalConnectionError (StatusRuntimeException e ) {
49+ if (globalRefreshTrigger == null ) {
50+ return ;
51+ }
52+ if (e .getStatus ().getCode () == io .grpc .Status .UNAVAILABLE .getCode ()) {
53+ logger .info ("Connection unavailable, triggering global topology refresh: {}" , e .getMessage ());
54+ try {
55+ globalRefreshTrigger .run ();
56+ } catch (Exception ex ) {
57+ logger .warn ("Failed to trigger global topology refresh: {}" , ex .getMessage ());
58+ }
59+ }
60+ }
61+
62+ private boolean handleGlobalRoutingError (Exception e ) {
63+ if (globalRefreshTrigger == null ) {
64+ return false ;
65+ }
66+ String message = e .getMessage ();
67+ if (message != null && message .contains (GLOBAL_ROUTING_ERROR )) {
68+ logger .info ("Detected {}, triggering global topology refresh" , GLOBAL_ROUTING_ERROR );
69+ try {
70+ globalRefreshTrigger .run ();
71+ } catch (Exception ex ) {
72+ logger .warn ("Failed to trigger global topology refresh: {}" , ex .getMessage ());
73+ }
74+ return true ;
75+ }
76+ return false ;
77+ }
78+
4779 public void handleResponse (String requestInfo , Status status ) {
4880 // the server made a change for error code:
4981 // for 2.2.x, error code is status.getErrorCode()
@@ -119,14 +151,8 @@ public <T> T retry(Callable<T> callable) {
119151 throw new MilvusClientException (ErrorCode .RPC_ERROR , msg ); // throw rpc error
120152 }
121153
122- // For UNAVAILABLE errors, trigger global topology refresh if configured
123- if (code == io .grpc .Status .UNAVAILABLE .getCode () && globalRefreshTrigger != null ) {
124- try {
125- globalRefreshTrigger .run ();
126- } catch (Exception ex ) {
127- logger .warn ("Failed to trigger global topology refresh: {}" , ex .getMessage ());
128- }
129- }
154+ // trigger topology refresh if connection is unavailable, and continue to retry
155+ handleGlobalConnectionError (e );
130156
131157 try {
132158 if (timeoutChecker .call () == Boolean .TRUE ) {
@@ -148,12 +174,13 @@ public <T> T retry(Callable<T> callable) {
148174 } catch (Exception ignored ) {
149175 }
150176
151- // for server-side returned error, only retry for rate limit
152- // in new error codes of v2.3, rate limit error value is 8
153177 if (retryConfig .isRetryOnRateLimit () &&
154178 (e .getLegacyServerCode () == io .milvus .grpc .ErrorCode .RateLimit .getNumber () ||
155179 e .getServerErrCode () == 8 )) {
156- // cannot be retried
180+ // for server-side returned error, only retry for rate limit
181+ // in new error codes of v2.3, rate limit error value is 8
182+ } else if (handleGlobalRoutingError (e )) {
183+ // for global cluster routing errors, immediately trigger topology refresh and continue to retry
157184 } else {
158185 throw e ; // exit retry, throw the error
159186 }
0 commit comments