Skip to content

Commit d1ec8f0

Browse files
committed
[chore] update
1 parent fe64ad9 commit d1ec8f0

2 files changed

Lines changed: 7 additions & 4 deletions

File tree

fastdeploy/engine/common_engine.py

Lines changed: 3 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -140,10 +140,6 @@ def __init__(self, cfg: FDConfig, start_queue=True, use_async_llm=False):
140140
self.cfg = cfg
141141
self.use_async_llm = use_async_llm
142142

143-
# 为每个 DP 设置独立的 PROMETHEUS_MULTIPROC_DIR,避免多 DP 的 Counter/Histogram 指标混淆
144-
if not envs.FD_ENABLE_MULTI_API_SERVER and self.cfg.parallel_config.data_parallel_size > 1:
145-
setup_dp_prometheus_dir(self.cfg.parallel_config.local_data_parallel_id)
146-
147143
if self.cfg.parallel_config.data_parallel_size > 1:
148144
self.llm_logger = get_logger(
149145
"fastdeploy", f"fastdeploy_dprank{self.cfg.parallel_config.local_data_parallel_id}.log"
@@ -2763,10 +2759,13 @@ def launch_components(self):
27632759
f"Engine is initialized successfully with {self.cfg.parallel_config.tensor_parallel_size}"
27642760
+ f" data parallel id {i}"
27652761
)
2762+
setup_dp_prometheus_dir(i)
27662763
self.dp_processed[-1].start()
27672764
while self.launched_expert_service_signal.value[i] == 0:
27682765
time.sleep(1)
27692766

2767+
setup_dp_prometheus_dir(0)
2768+
27702769
def check_worker_initialize_status(self):
27712770
"""
27722771
Check the initlialize status of workers by stdout logging

fastdeploy/engine/engine.py

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -50,6 +50,7 @@
5050
log_request_error,
5151
)
5252
from fastdeploy.metrics.metrics import main_process_metrics
53+
from fastdeploy.metrics.prometheus_multiprocess_setup import setup_dp_prometheus_dir
5354
from fastdeploy.platforms import current_platform
5455
from fastdeploy.utils import EngineError, console_logger, envs, llm_logger
5556

@@ -897,8 +898,11 @@ def launch_components(self):
897898
f"Engine is initialized successfully with {self.cfg.parallel_config.tensor_parallel_size}"
898899
+ f" data parallel id {i}"
899900
)
901+
setup_dp_prometheus_dir(i)
900902
self.dp_processed[-1].start()
901903

904+
setup_dp_prometheus_dir(0)
905+
902906
for i in range(
903907
1,
904908
self.cfg.parallel_config.data_parallel_size // self.cfg.nnode,

0 commit comments

Comments
 (0)