Skip to content

Commit fc3b3e9

Browse files
committed
use master refreshing interval; use runtimeMaxWait in batch execution
1 parent 11aa780 commit fc3b3e9

3 files changed

Lines changed: 41 additions & 29 deletions

File tree

src/main/java/com/alipay/oceanbase/rpc/location/model/TableLocations.java

Lines changed: 15 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -95,8 +95,12 @@ public TableEntry refreshMeta(String tableName, ServerRoster serverRoster,
9595
long runtimeMaxWait = tableClient.getRuntimeMaxWait();
9696
int tableEntryRefreshContinuousFailureCeiling = tableClient
9797
.getTableEntryRefreshContinuousFailureCeiling();
98+
long tableEntryRefreshIntervalBase = tableClient.getTableEntryRefreshIntervalBase();
99+
long tableEntryRefreshIntervalCeiling = tableClient.getTableEntryRefreshIntervalCeiling();
98100
long tableEntryRefreshLockTimeout = tableClient.getTableEntryRefreshLockTimeout();
99-
long refreshMetaInterval = 100L;
101+
long refreshMetaInterval = (long) (tableEntryRefreshIntervalBase * Math.pow(2,
102+
-serverRoster.getMaxPriority()));
103+
refreshMetaInterval = Math.min(refreshMetaInterval, tableEntryRefreshIntervalCeiling);
100104

101105
TableEntry tableEntry = locations.get(tableName);
102106
// avoid bad contention in high concurrent situation
@@ -288,7 +292,7 @@ public TableEntry refreshPartitionLocation(TableEntry tableEntry, String tableNa
288292
long runtimeMaxWait = tableClient.getRuntimeMaxWait();
289293
long tableEntryRefreshLockTimeout = tableClient.getTableEntryRefreshLockTimeout();
290294
long lastRefreshTime = locationInfo.getLastUpdateTime();
291-
long tableEntryRefreshInterval = 100L;
295+
long tableEntryRefreshInterval = tableClient.getTableEntryRefreshIntervalCeiling();
292296
long currentTime = System.currentTimeMillis();
293297
// do not refresh tablet location if refreshed within 300 milliseconds
294298
if (currentTime - lastRefreshTime < tableEntryRefreshInterval) {
@@ -421,14 +425,18 @@ public TableEntry refreshTabletLocationBatch(TableEntry tableEntry, String table
421425
long runtimeMaxWait = tableClient.getRuntimeMaxWait();
422426
long tableEntryRefreshLockTimeout = tableClient.getTableEntryRefreshLockTimeout();
423427
long lastRefreshTime = tableEntry.getPartitionEntry().getLastRefreshAllTime();
424-
long tableEntryRefreshInterval = 100L;
428+
long tableEntryRefreshIntervalBase = tableClient.getTableEntryRefreshIntervalBase();
429+
long tableEntryRefreshIntervalCeiling = tableClient.getTableEntryRefreshIntervalCeiling();
430+
long refreshBatchTabletInterval = (long) (tableEntryRefreshIntervalBase * Math.pow(2,
431+
-serverRoster.getMaxPriority()));
432+
refreshBatchTabletInterval = Math.min(refreshBatchTabletInterval, tableEntryRefreshIntervalCeiling);
425433
long currentTime = System.currentTimeMillis();
426434
// do not refresh tablet location if refreshed within 300 milliseconds
427-
if (currentTime - lastRefreshTime < tableEntryRefreshInterval) {
435+
if (currentTime - lastRefreshTime < refreshBatchTabletInterval) {
428436
logger
429437
.info(
430438
"punish table entry {}, last batch location refresh time {}, punish interval {}, current time {}.",
431-
tableName, lastRefreshTime, tableEntryRefreshInterval, currentTime);
439+
tableName, lastRefreshTime, refreshBatchTabletInterval, currentTime);
432440
return tableEntry;
433441
}
434442
Lock lock = getLocationBatchRefreshLock(tableName);
@@ -463,11 +471,11 @@ public TableEntry refreshTabletLocationBatch(TableEntry tableEntry, String table
463471
logger.warn("[latency monitor] success to acquire refresh tablet locations in batch lock, tableName: {}", tableName);
464472
lastRefreshTime = tableEntry.getPartitionEntry().getLastRefreshAllTime();
465473
currentTime = System.currentTimeMillis();
466-
if (currentTime - lastRefreshTime < tableEntryRefreshInterval) {
474+
if (currentTime - lastRefreshTime < refreshBatchTabletInterval) {
467475
logger
468476
.info(
469477
"punish table entry {}, last batch location refresh time {}, punish interval {}, current time {}.",
470-
tableName, lastRefreshTime, tableEntryRefreshInterval, currentTime);
478+
tableName, lastRefreshTime, refreshBatchTabletInterval, currentTime);
471479
return tableEntry;
472480
}
473481
logger.warn("[latency monitor] do refresh tablet locations in batch lock, tableName: {}", tableName);

src/main/java/com/alipay/oceanbase/rpc/table/ObTableClientBatchOpsImpl.java

Lines changed: 13 additions & 10 deletions
Original file line numberDiff line numberDiff line change
@@ -541,14 +541,23 @@ private boolean shouldRetry(Throwable throwable) {
541541

542542
private void executeWithRetries(ObTableOperationResult[] results, Map.Entry<Long, ObPair<ObTableParam, List<ObPair<Integer, ObTableOperation>>>> entry) throws Exception {
543543
int retryCount = 0;
544-
boolean success = false;
545544

546545
Map<Long, ObPair<ObTableParam, List<ObPair<Integer, ObTableOperation>>>> currentPartitions = new HashMap<>();
547546
currentPartitions.put(entry.getKey(), entry.getValue());
548547
int errCode = ResultCodes.OB_SUCCESS.errorCode;
549548
String errMsg = null;
550-
int maxRetryTimes = obTableClient.getRuntimeRetryTimes();
551-
while (retryCount < maxRetryTimes && !success) {
549+
long runTimeMaxWait = obTableClient.getRuntimeMaxWait();
550+
long startExecute = System.currentTimeMillis();
551+
while (true) {
552+
long costMillis = System.currentTimeMillis() - startExecute;
553+
if (costMillis > runTimeMaxWait) {
554+
errMsg = tableName + " failed to execute operation after retrying " + retryCount
555+
+ " times and it has waited" + costMillis + " ms"
556+
+ " which exceeds runtime max wait timeout " + runTimeMaxWait
557+
+ " ms. Last error Msg:" + "[errCode=" + errCode + "] " + errMsg;
558+
logger.error(errMsg);
559+
throw new ObTableUnexpectedException(errMsg);
560+
}
552561
boolean allPartitionsSuccess = true;
553562
for (Map.Entry<Long, ObPair<ObTableParam, List<ObPair<Integer, ObTableOperation>>>> currentEntry : currentPartitions.entrySet()) {
554563
try {
@@ -569,15 +578,9 @@ private void executeWithRetries(ObTableOperationResult[] results, Map.Entry<Long
569578
}
570579

571580
if (allPartitionsSuccess) {
572-
success = true;
581+
break;
573582
}
574583
}
575-
576-
if (!success) {
577-
errMsg = "Failed to execute operation after retrying " + retryCount + " times. Last error Msg:" +
578-
"[errCode="+ errCode +"] " + errMsg;
579-
throw new ObTableUnexpectedException(errMsg);
580-
}
581584
}
582585

583586
public ObTableBatchOperationResult executeInternal() throws Exception {

src/main/java/com/alipay/oceanbase/rpc/table/ObTableClientLSBatchOpsImpl.java

Lines changed: 13 additions & 12 deletions
Original file line numberDiff line numberDiff line change
@@ -827,17 +827,23 @@ private void executeWithRetries(ObTableSingleOpResult[] results,
827827
Map.Entry<Long, TabletOperationsMap> entry) throws Exception {
828828

829829
int retryCount = 0;
830-
boolean success = false;
831830

832831
LsOperationsMap currentPartitions = new LsOperationsMap();
833832
currentPartitions.put(entry.getKey(), entry.getValue());
834833
int errCode = ResultCodes.OB_SUCCESS.errorCode;
835834
String errMsg = null;
836-
int maxRetryTimes = obTableClient.getRuntimeRetryTimes();
837-
// cannot use runTimeWait to retry to timeout
838-
// because in huge-partitioned table situation, if refresh all tablets' locations
839-
// it's hard to complete refreshing within time limit
840-
while (retryCount < maxRetryTimes && !success) {
835+
long runTimeMaxWait = obTableClient.getRuntimeMaxWait();
836+
long startExecute = System.currentTimeMillis();
837+
while (true) {
838+
long costMillis = System.currentTimeMillis() - startExecute;
839+
if (costMillis > runTimeMaxWait) {
840+
errMsg = tableName + " failed to execute operation after retrying " + retryCount
841+
+ " times and it has waited" + costMillis + " ms"
842+
+ " which exceeds runtime max wait timeout " + runTimeMaxWait
843+
+ " ms. Last error Msg:" + "[errCode=" + errCode + "] " + errMsg;
844+
logger.error(errMsg);
845+
throw new ObTableUnexpectedException(errMsg);
846+
}
841847
boolean allPartitionsSuccess = true;
842848

843849
for (Map.Entry<Long, TabletOperationsMap> currentEntry : currentPartitions.entrySet()) {
@@ -861,14 +867,9 @@ private void executeWithRetries(ObTableSingleOpResult[] results,
861867
}
862868

863869
if (allPartitionsSuccess) {
864-
success = true;
870+
break;
865871
}
866872
}
867-
if (!success) {
868-
errMsg = "Failed to execute operation after retrying " + retryCount
869-
+ " times. Last error Msg:" + "[errCode=" + errCode + "] " + errMsg;
870-
throw new ObTableUnexpectedException(errMsg);
871-
}
872873
}
873874

874875
/*

0 commit comments

Comments
 (0)