Skip to content

Commit 1891f71

Browse files
authored
[fix](fe) add --drop_backends param to start_fe.sh (#63306)
This PR adds a --drop_backends startup flag for Doris FE, wiring it through bin/start_fe.sh into FE argument parsing, and executing backend removal when the FE becomes master.
1 parent 2d9ec45 commit 1891f71

5 files changed

Lines changed: 39 additions & 5 deletions

File tree

bin/start_fe.sh

Lines changed: 10 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -36,6 +36,7 @@ OPTS="$(getopt \
3636
-l 'recovery_journal_id:' \
3737
-l 'console' \
3838
-l 'cluster_snapshot:' \
39+
-l 'drop_backends' \
3940
-- "$@")"
4041

4142
eval set -- "${OPTS}"
@@ -50,6 +51,7 @@ declare -a HELPER_ARGS=()
5051
declare -a METADATA_FAILURE_RECOVERY_ARGS=()
5152
declare -a RECOVERY_JOURNAL_ID_ARGS=()
5253
declare -a CLUSTER_SNAPSHOT_ARGS=()
54+
declare -a DROP_BACKENDS_ARGS=()
5355
while true; do
5456
case "$1" in
5557
--daemon)
@@ -85,6 +87,10 @@ while true; do
8587
CLUSTER_SNAPSHOT_ARGS=("--cluster_snapshot" "$2")
8688
shift 2
8789
;;
90+
--drop_backends)
91+
DROP_BACKENDS_ARGS=("--drop_backends")
92+
shift
93+
;;
8894
--)
8995
shift
9096
break
@@ -93,7 +99,7 @@ while true; do
9399
echo "Internal error"
94100
exit 1
95101
;;
96-
esac
102+
esac
97103
done
98104

99105
DORIS_HOME="$(
@@ -433,12 +439,12 @@ if [[ "${IMAGE_TOOL}" -eq 1 ]]; then
433439
echo "Internal error, USE IMAGE_TOOL like: ./start_fe.sh --image image_path"
434440
fi
435441
elif [[ "${RUN_DAEMON}" -eq 1 ]]; then
436-
nohup ${LIMIT:+${LIMIT}} "${JAVA}" ${final_java_opt:+${final_java_opt}} -XX:-OmitStackTraceInFastThrow -XX:OnOutOfMemoryError="kill -9 %p" ${coverage_opt:+${coverage_opt}} org.apache.doris.DorisFE "${HELPER_ARGS[@]}" "${METADATA_FAILURE_RECOVERY_ARGS[@]}" "${RECOVERY_JOURNAL_ID_ARGS[@]}" "${CLUSTER_SNAPSHOT_ARGS[@]}" "$@" >>"${STDOUT_LOGGER}" 2>&1 </dev/null &
442+
nohup ${LIMIT:+${LIMIT}} "${JAVA}" ${final_java_opt:+${final_java_opt}} -XX:-OmitStackTraceInFastThrow -XX:OnOutOfMemoryError="kill -9 %p" ${coverage_opt:+${coverage_opt}} org.apache.doris.DorisFE "${HELPER_ARGS[@]}" "${METADATA_FAILURE_RECOVERY_ARGS[@]}" "${RECOVERY_JOURNAL_ID_ARGS[@]}" "${CLUSTER_SNAPSHOT_ARGS[@]}" "${DROP_BACKENDS_ARGS[@]}" "$@" >>"${STDOUT_LOGGER}" 2>&1 </dev/null &
437443
elif [[ "${RUN_CONSOLE}" -eq 1 ]]; then
438444
export DORIS_LOG_TO_STDERR=1
439-
${LIMIT:+${LIMIT}} "${JAVA}" ${final_java_opt:+${final_java_opt}} -XX:-OmitStackTraceInFastThrow -XX:OnOutOfMemoryError="kill -9 %p" ${coverage_opt:+${coverage_opt}} org.apache.doris.DorisFE "${HELPER_ARGS[@]}" ${OPT_VERSION:+${OPT_VERSION}} "${METADATA_FAILURE_RECOVERY_ARGS[@]}" "${RECOVERY_JOURNAL_ID_ARGS[@]}" "${CLUSTER_SNAPSHOT_ARGS[@]}" "$@" >>"${STDOUT_LOGGER}" </dev/null
445+
${LIMIT:+${LIMIT}} "${JAVA}" ${final_java_opt:+${final_java_opt}} -XX:-OmitStackTraceInFastThrow -XX:OnOutOfMemoryError="kill -9 %p" ${coverage_opt:+${coverage_opt}} org.apache.doris.DorisFE "${HELPER_ARGS[@]}" ${OPT_VERSION:+${OPT_VERSION}} "${METADATA_FAILURE_RECOVERY_ARGS[@]}" "${RECOVERY_JOURNAL_ID_ARGS[@]}" "${CLUSTER_SNAPSHOT_ARGS[@]}" "${DROP_BACKENDS_ARGS[@]}" "$@" >>"${STDOUT_LOGGER}" </dev/null
440446
else
441-
${LIMIT:+${LIMIT}} "${JAVA}" ${final_java_opt:+${final_java_opt}} -XX:-OmitStackTraceInFastThrow -XX:OnOutOfMemoryError="kill -9 %p" ${coverage_opt:+${coverage_opt}} org.apache.doris.DorisFE "${HELPER_ARGS[@]}" ${OPT_VERSION:+${OPT_VERSION}} "${METADATA_FAILURE_RECOVERY_ARGS[@]}" "${RECOVERY_JOURNAL_ID_ARGS[@]}" "${CLUSTER_SNAPSHOT_ARGS[@]}" "$@" >>"${STDOUT_LOGGER}" 2>&1 </dev/null
447+
${LIMIT:+${LIMIT}} "${JAVA}" ${final_java_opt:+${final_java_opt}} -XX:-OmitStackTraceInFastThrow -XX:OnOutOfMemoryError="kill -9 %p" ${coverage_opt:+${coverage_opt}} org.apache.doris.DorisFE "${HELPER_ARGS[@]}" ${OPT_VERSION:+${OPT_VERSION}} "${METADATA_FAILURE_RECOVERY_ARGS[@]}" "${RECOVERY_JOURNAL_ID_ARGS[@]}" "${CLUSTER_SNAPSHOT_ARGS[@]}" "${DROP_BACKENDS_ARGS[@]}" "$@" >>"${STDOUT_LOGGER}" 2>&1 </dev/null
442448
fi
443449

444450
if [[ "${OPT_VERSION}" != "" ]]; then

docker/runtime/doris-compose/resource/init_fe.sh

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -188,7 +188,7 @@ start_cloud_fe() {
188188
exit $MV_RES
189189
fi
190190
health_log "Recovery script executed and renamed to ${RECOVERY_SCRIPT}.bak"
191-
RECOVERY_ARGS="--metadata_failure_recovery --recovery_journal_id $JOURNAL_ID"
191+
RECOVERY_ARGS="--metadata_failure_recovery --recovery_journal_id $JOURNAL_ID --drop_backends"
192192
fi
193193

194194
if [ -f "$REGISTER_FILE" ] || [ -n "${CLUSTER_SNAPSHOT_FILE}" ] || [ -n "$RECOVERY_ARGS" ]; then

fe/fe-core/src/main/java/org/apache/doris/DorisFE.java

Lines changed: 5 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -363,6 +363,8 @@ private static CommandLineOptions parseArgs(String[] args) {
363363
.desc("Specify the recovery truncate journal id, and journals greater than this id will be removed")
364364
.build());
365365
options.addOption("c", "cluster_snapshot", true, "Specify the cluster snapshot json file");
366+
options.addOption(Option.builder().longOpt(FeConstants.DROP_BACKENDS_KEY)
367+
.desc("When this FE becomes MASTER, drop all backends from cluster metadata (destructive)").build());
366368

367369
CommandLine cmd = null;
368370
try {
@@ -407,6 +409,9 @@ private static CommandLineOptions parseArgs(String[] args) {
407409
}
408410
System.setProperty(FeConstants.RECOVERY_JOURNAL_ID_KEY, recoveryJournalId.trim());
409411
}
412+
if (cmd.hasOption(FeConstants.DROP_BACKENDS_KEY)) {
413+
System.setProperty(FeConstants.DROP_BACKENDS_KEY, "true");
414+
}
410415
if (cmd.hasOption('b') || cmd.hasOption("bdb")) {
411416
if (cmd.hasOption('l') || cmd.hasOption("listdb")) {
412417
// list bdb je databases

fe/fe-core/src/main/java/org/apache/doris/catalog/Env.java

Lines changed: 22 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -55,6 +55,7 @@
5555
import org.apache.doris.clone.TabletChecker;
5656
import org.apache.doris.clone.TabletScheduler;
5757
import org.apache.doris.clone.TabletSchedulerStat;
58+
import org.apache.doris.cloud.system.CloudSystemInfoService;
5859
import org.apache.doris.common.AnalysisException;
5960
import org.apache.doris.common.Config;
6061
import org.apache.doris.common.ConfigBase;
@@ -272,6 +273,7 @@
272273
import org.apache.doris.statistics.StatisticsJobAppender;
273274
import org.apache.doris.statistics.StatisticsMetricCollector;
274275
import org.apache.doris.statistics.query.QueryStats;
276+
import org.apache.doris.system.Backend;
275277
import org.apache.doris.system.Frontend;
276278
import org.apache.doris.system.HeartbeatMgr;
277279
import org.apache.doris.system.SystemInfoService;
@@ -1826,6 +1828,26 @@ private void transferToMaster() {
18261828
editLog.logMasterInfo(masterInfo);
18271829
LOG.info("logMasterInfo:{}", masterInfo);
18281830

1831+
if (Boolean.getBoolean(FeConstants.DROP_BACKENDS_KEY)) {
1832+
LOG.info("drop_backends is set, dropping all backends...");
1833+
try {
1834+
SystemInfoService systemInfoService = Env.getCurrentSystemInfo();
1835+
List<Backend> bes = systemInfoService.getAllClusterBackendsNoException().values()
1836+
.stream().collect(Collectors.toList());
1837+
if (Config.isNotCloudMode()) {
1838+
for (Backend be : bes) {
1839+
systemInfoService.dropBackend(be.getHost(), be.getHeartbeatPort());
1840+
}
1841+
} else {
1842+
((CloudSystemInfoService) systemInfoService).updateCloudBackends(Collections.emptyList(), bes);
1843+
}
1844+
} catch (Exception e) {
1845+
LOG.warn("failed to drop backends", e);
1846+
}
1847+
System.clearProperty(FeConstants.DROP_BACKENDS_KEY);
1848+
LOG.info("finished dropping all backends");
1849+
}
1850+
18291851
// for master, the 'isReady' is set behind.
18301852
// but we are sure that all metadata is replayed if we get here.
18311853
// so no need to check 'isReady' flag in this method

fe/fe-core/src/main/java/org/apache/doris/common/FeConstants.java

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -68,4 +68,5 @@ public class FeConstants {
6868

6969
public static String METADATA_FAILURE_RECOVERY_KEY = "metadata_failure_recovery";
7070
public static String RECOVERY_JOURNAL_ID_KEY = "recovery_journal_id";
71+
public static String DROP_BACKENDS_KEY = "drop_backends";
7172
}

0 commit comments

Comments
 (0)