Skip to content

Commit c25b088

Browse files
committed
refactor(vm): make constant-call throttle per-RPC, not per-VM-call
estimateEnergy and view-method dispatch invoke callConstantContract many times per request; the prior per-call tryAcquire/release made long estimates fragile under load. Track permit ownership via a static final ThreadLocal<Boolean> with no withInitial, set after a successful acquire and remove()d in finally so worker threads in the gRPC/HTTP pool leave no map entry behind.
1 parent d8252da commit c25b088

2 files changed

Lines changed: 115 additions & 12 deletions

File tree

framework/src/main/java/org/tron/core/Wallet.java

Lines changed: 66 additions & 12 deletions
Original file line numberDiff line numberDiff line change
@@ -283,14 +283,48 @@ public class Wallet {
283283

284284
private static volatile Semaphore constantCallSemaphore;
285285

286+
/**
287+
* Reentrancy flag for the constant-call concurrency cap. Only the OUTERMOST
288+
* {@link #callConstantContract} frame on a given thread takes/releases a
289+
* permit; inner frames (e.g., the binary-search loop inside
290+
* {@link #estimateEnergy}, or {@link #triggerContract} dispatching to a
291+
* view/pure method) read this flag and skip the throttle, so a single
292+
* top-level RPC occupies one slot for its full lifetime regardless of how
293+
* many internal VM invocations it performs. Issue #6681.
294+
*
295+
* <p>Memory-safety:
296+
* <ul>
297+
* <li>The field is {@code static final}, so the {@code WeakReference} key
298+
* inside each worker thread's {@code ThreadLocalMap} stays live for
299+
* the JVM lifetime — no stale-key cleanup story to worry about.</li>
300+
* <li>No {@code withInitial(...)}: a thread that never enters the
301+
* throttled path never has an entry created in its
302+
* {@code ThreadLocalMap}. {@link ThreadLocal#get() get()} returns
303+
* {@code null} for those threads.</li>
304+
* <li>The value is always {@code Boolean.TRUE} (a JVM-cached singleton).
305+
* The outermost frame {@link ThreadLocal#remove() remove()}s the
306+
* entry in {@code finally} so the map slot is reclaimed before the
307+
* worker returns to the gRPC/HTTP pool.</li>
308+
* </ul>
309+
*
310+
* <p>Correctness depends on the codebase's single-thread-per-request
311+
* invariant for both gRPC sync handlers (see {@code RpcService} which wires
312+
* a {@code newFixedThreadPool} into {@code NettyServerBuilder.executor}) and
313+
* the HTTP servlet handlers. Any future refactor that offloads VM work to
314+
* a different thread mid-request must re-evaluate this guard.
315+
*/
316+
private static final ThreadLocal<Boolean> CONSTANT_CALL_PERMIT_HELD =
317+
new ThreadLocal<>();
318+
286319
private static Semaphore getConstantCallSemaphore() {
287320
Semaphore s = constantCallSemaphore;
288321
if (s == null) {
289322
synchronized (Wallet.class) {
290323
s = constantCallSemaphore;
291324
if (s == null) {
292-
s = new Semaphore(
293-
CommonParameter.getInstance().getConstantCallMaxConcurrency(), true);
325+
// Non-fair: tryAcquire() (no-arg) bypasses the fair queue regardless
326+
// of this flag, so requesting fairness here is misleading.
327+
s = new Semaphore(CommonParameter.getInstance().getConstantCallMaxConcurrency());
294328
constantCallSemaphore = s;
295329
}
296330
}
@@ -3177,15 +3211,31 @@ public Transaction callConstantContract(TransactionCapsule trxCap,
31773211
throw new ContractValidateException("this node does not support constant");
31783212
}
31793213

3180-
long networkMaxCpuTimeOfOneTxMs = chainBaseManager.getDynamicPropertiesStore()
3181-
.getMaxCpuTimeOfOneTx();
3182-
boolean throttled = VmTimeoutPolicy.isCapEngaged(
3183-
CommonParameter.getInstance().getConstantCallTimeoutMs(), networkMaxCpuTimeOfOneTxMs);
3184-
Semaphore sem = throttled ? getConstantCallSemaphore() : null;
3185-
if (sem != null && !sem.tryAcquire()) {
3186-
throw new ContractValidateException(
3187-
"too many concurrent constant calls; configured cap is "
3188-
+ CommonParameter.getInstance().getConstantCallMaxConcurrency());
3214+
// Reentrancy guard: a single top-level RPC may invoke this method many
3215+
// times on the same thread — estimateEnergy's binary search runs ~25-30
3216+
// iterations, and triggerContract dispatches view/pure functions through
3217+
// here. Only the OUTERMOST frame takes a permit; inner frames see the
3218+
// ThreadLocal flag and skip the throttle so per-RPC concurrency, not
3219+
// per-VM-call concurrency, is what we cap. Issue #6681.
3220+
Semaphore sem = null;
3221+
boolean acquiredHere = false;
3222+
if (!Boolean.TRUE.equals(CONSTANT_CALL_PERMIT_HELD.get())) {
3223+
long networkMaxCpuTimeOfOneTxMs = chainBaseManager.getDynamicPropertiesStore()
3224+
.getMaxCpuTimeOfOneTx();
3225+
if (VmTimeoutPolicy.isCapEngaged(
3226+
CommonParameter.getInstance().getConstantCallTimeoutMs(),
3227+
networkMaxCpuTimeOfOneTxMs)) {
3228+
sem = getConstantCallSemaphore();
3229+
if (!sem.tryAcquire()) {
3230+
// Throw before set(TRUE) so a failed acquire never pollutes the
3231+
// ThreadLocalMap.
3232+
throw new ContractValidateException(
3233+
"too many concurrent constant calls; configured cap is "
3234+
+ CommonParameter.getInstance().getConstantCallMaxConcurrency());
3235+
}
3236+
CONSTANT_CALL_PERMIT_HELD.set(Boolean.TRUE);
3237+
acquiredHere = true;
3238+
}
31893239
}
31903240
try {
31913241
Block headBlock;
@@ -3236,7 +3286,11 @@ public Transaction callConstantContract(TransactionCapsule trxCap,
32363286
trxCap.setResult(ret);
32373287
return trxCap.getInstance();
32383288
} finally {
3239-
if (sem != null) {
3289+
if (acquiredHere) {
3290+
// remove() before release(): clears the slot from this worker's
3291+
// ThreadLocalMap so it goes back to the pool clean. Required because
3292+
// the field has no withInitial — only set/remove pairs touch the map.
3293+
CONSTANT_CALL_PERMIT_HELD.remove();
32403294
sem.release();
32413295
}
32423296
}

framework/src/test/java/org/tron/core/WalletConstantCallThrottleTest.java

Lines changed: 49 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -2,11 +2,13 @@
22

33
import static org.junit.Assert.assertEquals;
44
import static org.junit.Assert.assertFalse;
5+
import static org.junit.Assert.assertNull;
56
import static org.junit.Assert.assertSame;
67
import static org.junit.Assert.assertTrue;
78

89
import java.lang.reflect.Field;
910
import java.lang.reflect.Method;
11+
import java.lang.reflect.Modifier;
1012
import java.util.concurrent.Semaphore;
1113

1214
import org.junit.After;
@@ -25,12 +27,15 @@
2527
public class WalletConstantCallThrottleTest {
2628

2729
private static final Field SEMAPHORE_FIELD;
30+
private static final Field PERMIT_HELD_FIELD;
2831
private static final Method GET_SEMAPHORE_METHOD;
2932

3033
static {
3134
try {
3235
SEMAPHORE_FIELD = Wallet.class.getDeclaredField("constantCallSemaphore");
3336
SEMAPHORE_FIELD.setAccessible(true);
37+
PERMIT_HELD_FIELD = Wallet.class.getDeclaredField("CONSTANT_CALL_PERMIT_HELD");
38+
PERMIT_HELD_FIELD.setAccessible(true);
3439
GET_SEMAPHORE_METHOD = Wallet.class.getDeclaredMethod("getConstantCallSemaphore");
3540
GET_SEMAPHORE_METHOD.setAccessible(true);
3641
} catch (NoSuchFieldException | NoSuchMethodException e) {
@@ -103,4 +108,48 @@ public void semaphoreSaturatesAtConfiguredCap() throws Exception {
103108
sem.release();
104109
sem.release();
105110
}
111+
112+
// ===========================================================================
113+
// ThreadLocal reentrancy guard (issue #6681). The flag lets a single
114+
// top-level RPC (estimateEnergy's binary search, triggerContract dispatching
115+
// a view/pure function, etc.) hold ONE permit across many internal
116+
// callConstantContract invocations. These tests pin the memory-safety
117+
// invariants the implementation relies on.
118+
// ===========================================================================
119+
120+
@Test
121+
public void permitFlagFieldIsStaticFinal() {
122+
int mods = PERMIT_HELD_FIELD.getModifiers();
123+
// static: one ThreadLocal instance per JVM, key never goes stale in any
124+
// worker's ThreadLocalMap.
125+
assertTrue("CONSTANT_CALL_PERMIT_HELD must be static",
126+
Modifier.isStatic(mods));
127+
// final: the WeakReference key inside each worker's ThreadLocalMap stays
128+
// valid for the JVM lifetime, so there is no "key collected, value
129+
// orphaned" leak path.
130+
assertTrue("CONSTANT_CALL_PERMIT_HELD must be final",
131+
Modifier.isFinal(mods));
132+
}
133+
134+
@Test
135+
@SuppressWarnings("unchecked")
136+
public void permitFlagIsLazyAndCleansUpAfterRemove() throws Exception {
137+
ThreadLocal<Boolean> tl = (ThreadLocal<Boolean>) PERMIT_HELD_FIELD.get(null);
138+
139+
// No withInitial(...) — fresh state must read null. Threads that never
140+
// enter the throttled path leave no entry behind in their ThreadLocalMap.
141+
assertNull("ThreadLocal must not pre-populate; null = not held", tl.get());
142+
143+
try {
144+
tl.set(Boolean.TRUE);
145+
assertEquals(Boolean.TRUE, tl.get());
146+
} finally {
147+
tl.remove();
148+
}
149+
150+
// After remove(), get() must return null again — guarantees that the
151+
// worker thread's ThreadLocalMap slot is reclaimed, not left holding a
152+
// stale Boolean reference across requests.
153+
assertNull("after remove(), get() must return null again", tl.get());
154+
}
106155
}

0 commit comments

Comments
 (0)