Skip to content

Commit aab9152

Browse files
committed
Optimize noisy kernel logs
1 parent 0d1b838 commit aab9152

2 files changed

Lines changed: 37 additions & 3 deletions

File tree

iotdb-core/datanode/src/main/java/org/apache/iotdb/db/queryengine/plan/scheduler/FixedRateFragInsStateTracker.java

Lines changed: 11 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -156,6 +156,11 @@ private void fetchStateAndUpdate() {
156156
if (metrics.reachMaxRetryCount()) {
157157
// if reach max retry count, we think that the DN is down, and FI in that node won't
158158
// exist
159+
logger.warn(
160+
"Failed to fetch state for FragmentInstance {} after {} retries, mark it as no such instance",
161+
instance.getId(),
162+
InstanceStateMetrics.MAX_STATE_FETCH_RETRY_COUNT,
163+
e);
159164
FragmentInstanceInfo instanceInfo = new FragmentInstanceInfo(NO_SUCH_INSTANCE);
160165
instanceInfo.setMessage(
161166
String.format(
@@ -165,7 +170,12 @@ private void fetchStateAndUpdate() {
165170
} else {
166171
// if not reaching max retry count, add retry count, and wait for next fetching schedule
167172
metrics.addRetryCount();
168-
logger.warn(DataNodeQueryMessages.ERROR_HAPPENED_WHILE_FETCHING_QUERY_STATE, e);
173+
logger.debug(
174+
"Failed to fetch state for FragmentInstance {}, retry {}/{}",
175+
instance.getId(),
176+
metrics.retryCount,
177+
InstanceStateMetrics.MAX_STATE_FETCH_RETRY_COUNT,
178+
e);
169179
}
170180
}
171181
}

iotdb-core/datanode/src/main/java/org/apache/iotdb/db/storageengine/dataregion/compaction/schedule/CompactionScheduler.java

Lines changed: 26 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -47,6 +47,8 @@
4747
import java.util.Collections;
4848
import java.util.List;
4949
import java.util.concurrent.Phaser;
50+
import java.util.concurrent.TimeUnit;
51+
import java.util.concurrent.atomic.AtomicLong;
5052
import java.util.concurrent.locks.ReadWriteLock;
5153
import java.util.concurrent.locks.ReentrantReadWriteLock;
5254
import java.util.stream.Collectors;
@@ -65,6 +67,9 @@ public class CompactionScheduler {
6567
private static final Logger LOGGER =
6668
LoggerFactory.getLogger(IoTDBConstant.COMPACTION_LOGGER_NAME);
6769
private static final IoTDBConfig config = IoTDBDescriptor.getInstance().getConfig();
70+
private static final long DISK_SPACE_CHECK_FAIL_LOG_INTERVAL_MS = TimeUnit.MINUTES.toMillis(1);
71+
private static final AtomicLong LAST_DISK_SPACE_CHECK_FAIL_LOG_TIME = new AtomicLong(0);
72+
private static final AtomicLong SUPPRESSED_DISK_SPACE_CHECK_FAIL_LOG_COUNT = new AtomicLong(0);
6873

6974
private CompactionScheduler() {}
7075

@@ -207,13 +212,32 @@ private static boolean canAddTaskToWaitingQueue(AbstractCompactionTask task)
207212
}
208213
// check disk space
209214
if (!task.isDiskSpaceCheckPassed()) {
210-
LOGGER.info(
211-
"Compaction task start check failed because disk free ratio is less than disk_space_warning_threshold");
215+
logDiskSpaceCheckFailure(task);
212216
return false;
213217
}
214218
return true;
215219
}
216220

221+
private static void logDiskSpaceCheckFailure(AbstractCompactionTask task) {
222+
long now = System.currentTimeMillis();
223+
long lastLogTime = LAST_DISK_SPACE_CHECK_FAIL_LOG_TIME.get();
224+
if (now - lastLogTime >= DISK_SPACE_CHECK_FAIL_LOG_INTERVAL_MS
225+
&& LAST_DISK_SPACE_CHECK_FAIL_LOG_TIME.compareAndSet(lastLogTime, now)) {
226+
long suppressedCount = SUPPRESSED_DISK_SPACE_CHECK_FAIL_LOG_COUNT.getAndSet(0);
227+
LOGGER.info(
228+
"Skip compaction task because disk free ratio is less than disk_space_warning_threshold, "
229+
+ "taskType={}, storageGroup={}, dataRegion={}, timePartition={}, processedFileNum={}, suppressedSimilarLogs={}",
230+
task.getCompactionTaskType(),
231+
task.getStorageGroupName(),
232+
task.getDataRegionId(),
233+
task.getTimePartition(),
234+
task.getProcessedFileNum(),
235+
suppressedCount);
236+
} else {
237+
SUPPRESSED_DISK_SPACE_CHECK_FAIL_LOG_COUNT.incrementAndGet();
238+
}
239+
}
240+
217241
public static int scheduleInsertionCompaction(
218242
TsFileManager tsFileManager, long timePartition, CompactionScheduleContext context)
219243
throws InterruptedException {

0 commit comments

Comments
 (0)