Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
Original file line number Diff line number Diff line change
Expand Up @@ -25,6 +25,8 @@
import org.apache.hugegraph.define.WorkLoad;
import org.apache.hugegraph.util.Bytes;
import org.apache.hugegraph.util.E;
import org.apache.hugegraph.util.Log;
import org.slf4j.Logger;

import com.google.common.collect.ImmutableSet;
import com.google.common.util.concurrent.RateLimiter;
Expand All @@ -43,6 +45,8 @@
@PreMatching
public class LoadDetectFilter implements ContainerRequestFilter {

private static final Logger LOG = Log.logger(LoadDetectFilter.class);

private static final Set<String> WHITE_API_LIST = ImmutableSet.of(
"",
"apis",
Expand All @@ -54,11 +58,40 @@ public class LoadDetectFilter implements ContainerRequestFilter {
private static final RateLimiter GC_RATE_LIMITER =
RateLimiter.create(1.0 / 30);

Copy link
Copy Markdown
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

⚠️ 两条拒绝路径共享同一个 REJECT_LOG_RATE_LIMITER

高负载拒绝和低内存拒绝共享同一个 RateLimiter(每秒 1 个 permit)。这意味着:

  • 如果高负载拒绝消耗了 permit,紧接着的低内存拒绝就不会被记录(反之亦然)
  • 运维人员在同时出现高负载和低内存时,可能只看到一种告警,遗漏另一种

建议为两种拒绝原因使用独立的 RateLimiter:

private static final RateLimiter BUSY_LOG_LIMITER = RateLimiter.create(1.0);
private static final RateLimiter MEMORY_LOG_LIMITER = RateLimiter.create(1.0);

// Log at most 1 request per second to avoid too many logs when server is under heavy load
private static final RateLimiter REJECT_LOG_RATE_LIMITER = RateLimiter.create(1.0);

@Context
private jakarta.inject.Provider<HugeConfig> configProvider;
@Context
private jakarta.inject.Provider<WorkLoad> loadProvider;

public static boolean isWhiteAPI(ContainerRequestContext context) {
List<PathSegment> segments = context.getUriInfo().getPathSegments();
E.checkArgument(!segments.isEmpty(), "Invalid request uri '%s'",
context.getUriInfo().getPath());
String rootPath = segments.get(0).getPath();
Copy link
Copy Markdown
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

⚠️ 方法可见性变更:gcIfNeededprivate static 改为 protected 实例方法

原方法是 private static void gcIfNeeded(),现改为 protected boolean gcIfNeeded()。变更影响:

  1. 扩大可见性(private → protected),破坏了封装性
  2. 从静态方法变为实例方法
  3. 主要目的似乎是为了测试时 override

建议保持 private 可见性,如果需要测试可测试性,可以:

  • 使用 package-private 可见性 + @VisibleForTesting 注解
  • 或者通过注入策略接口实现

同样的建议也适用于 allowRejectLog() 方法。

return WHITE_API_LIST.contains(rootPath);
}

protected boolean gcIfNeeded() {
if (GC_RATE_LIMITER.tryAcquire(1)) {
System.gc();
return true;
}
return false;
}

protected boolean allowRejectLog() {
return REJECT_LOG_RATE_LIMITER.tryAcquire();
}

protected void logRejectWarning(String message, Object... args) {
Copy link
Copy Markdown
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

⚠️ logRejectWarning() 与直接调用 allowRejectLog() 的不一致

高负载路径使用封装的 logRejectWarning(),但低内存路径直接调用 allowRejectLog() + LOG.warn()

// 高负载路径
this.logRejectWarning("Rejected request due to high worker load...");

// 低内存路径
boolean shouldLog = this.allowRejectLog();
if (shouldLog) { LOG.warn(...); }

两条路径的日志方式不一致,降低了可读性。建议统一为同一种模式。另外 logRejectWarning() 在生产代码中只被调用了一次,是否真的需要抽取为独立方法值得考虑。

if (this.allowRejectLog()) {
LOG.warn(message, args);
}
}

@Override
public void filter(ContainerRequestContext context) {
if (LoadDetectFilter.isWhiteAPI(context)) {
Expand All @@ -70,7 +103,12 @@ public void filter(ContainerRequestContext context) {
int maxWorkerThreads = config.get(ServerOptions.MAX_WORKER_THREADS);
WorkLoad load = this.loadProvider.get();
// There will be a thread doesn't work, dedicated to statistics
if (load.incrementAndGet() >= maxWorkerThreads) {
int currentLoad = load.incrementAndGet();
if (currentLoad >= maxWorkerThreads) {
this.logRejectWarning("Rejected request due to high worker load, method={}, path={}, " +
"currentLoad={}, maxWorkerThreads={}",
context.getMethod(), context.getUriInfo().getPath(),
currentLoad, maxWorkerThreads);
throw new ServiceUnavailableException(String.format(
"The server is too busy to process the request, " +
"you can config %s to adjust it or try again later",
Expand All @@ -83,7 +121,20 @@ public void filter(ContainerRequestContext context) {
long presumableFreeMem = (Runtime.getRuntime().maxMemory() -
allocatedMem) / Bytes.MB;
if (presumableFreeMem < minFreeMemory) {
gcIfNeeded();
boolean shouldLog = this.allowRejectLog();
boolean gcTriggered = this.gcIfNeeded();
if (shouldLog) {
long allocatedMemAfterCheck = Runtime.getRuntime().totalMemory() -
Copy link
Copy Markdown
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

🧹 recheckedFreeMemgcTriggered=false 时是冗余信息

gcTriggered=false 时,recheckedFreeMempresumableFreeMem 几乎完全相同(两次采样之间几乎没有时间差),日志中两个近似相同的值会造成困惑。

建议只在 gcTriggered=true 时才计算并记录 recheckedFreeMem,否则省略此字段。

Runtime.getRuntime().freeMemory();
long recheckedFreeMem = (Runtime.getRuntime().maxMemory() -
allocatedMemAfterCheck) / Bytes.MB;
LOG.warn("Rejected request due to low free memory, method={}, path={}, " +
"presumableFreeMemMB={}, recheckedFreeMemMB={}, gcTriggered={}, " +
"minFreeMemoryMB={}",
context.getMethod(), context.getUriInfo().getPath(),
Copy link
Copy Markdown
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

‼️ 内存拒绝路径中的逻辑问题:GC 后未重新检查内存

当前逻辑在 GC 后直接抛出异常,并没有重新检查内存是否已恢复到阈值以上。既然已经在这里做了 recheckedFreeMem 计算,建议更进一步:GC 后重新检查内存,如果已恢复则放行请求,减少不必要的 503。

当前行为:检测到内存不足 → 尝试 GC → 记录 GC 后的内存 → 仍然抛异常
建议行为:检测到内存不足 → 尝试 GC → 重新检查 → 如果恢复则放行

Suggested change
context.getMethod(), context.getUriInfo().getPath(),
boolean gcTriggered = this.gcIfNeeded();
if (gcTriggered) {
long allocatedMemAfterGc = Runtime.getRuntime().totalMemory() -
Runtime.getRuntime().freeMemory();
long freeMemAfterGc = (Runtime.getRuntime().maxMemory() -
allocatedMemAfterGc) / Bytes.MB;
if (freeMemAfterGc >= minFreeMemory) {
this.logRejectWarning(
"Low memory recovered after GC, method={}, path={}, " +
"beforeFreeMB={}, afterFreeMB={}",
context.getMethod(), context.getUriInfo().getPath(),
presumableFreeMem, freeMemAfterGc);
return;
}
}
this.logRejectWarning(
"Rejected request due to low free memory, method={}, path={}, " +
"presumableFreeMemMB={}, gcTriggered={}, minFreeMemoryMB={}",
context.getMethod(), context.getUriInfo().getPath(),
presumableFreeMem, gcTriggered, minFreeMemory);

presumableFreeMem, recheckedFreeMem, gcTriggered,
minFreeMemory);
}
throw new ServiceUnavailableException(String.format(
"The server available memory %s(MB) is below than " +
"threshold %s(MB) and can't process the request, " +
Expand All @@ -92,18 +143,4 @@ public void filter(ContainerRequestContext context) {
ServerOptions.MIN_FREE_MEMORY.name()));
}
}

public static boolean isWhiteAPI(ContainerRequestContext context) {
List<PathSegment> segments = context.getUriInfo().getPathSegments();
E.checkArgument(!segments.isEmpty(), "Invalid request uri '%s'",
context.getUriInfo().getPath());
String rootPath = segments.get(0).getPath();
return WHITE_API_LIST.contains(rootPath);
}

private static void gcIfNeeded() {
if (GC_RATE_LIMITER.tryAcquire(1)) {
System.gc();
}
}
}
Original file line number Diff line number Diff line change
Expand Up @@ -18,6 +18,7 @@
package org.apache.hugegraph.unit;

import org.apache.hugegraph.core.RoleElectionStateMachineTest;
import org.apache.hugegraph.unit.api.filter.LoadDetectFilterTest;
import org.apache.hugegraph.unit.api.filter.PathFilterTest;
import org.apache.hugegraph.unit.cache.CacheManagerTest;
import org.apache.hugegraph.unit.cache.CacheTest;
Expand Down Expand Up @@ -78,6 +79,7 @@
@RunWith(Suite.class)
@Suite.SuiteClasses({
/* api filter */
LoadDetectFilterTest.class,
PathFilterTest.class,

/* cache */
Expand Down
Loading
Loading