Skip to content

Commit 71dffb8

Browse files
committed
fix
1 parent b0ea2a9 commit 71dffb8

8 files changed

Lines changed: 20 additions & 3 deletions

File tree

lightllm/server/router/model_infer/mode_backend/pd_nixl/decode_node_impl/decode_kv_move_manager.py

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1,5 +1,6 @@
11
import inspect
22
import pickle
3+
import setproctitle
34
import torch.multiprocessing as mp
45
import time
56
from typing import List, Dict, Optional, Tuple, Union, Callable
@@ -10,6 +11,7 @@
1011
from ..trans_process_obj import KVTransProcess
1112
from ..base_kv_move_manager import BaseKVMoveManager
1213
from lightllm.utils.error_utils import log_exception
14+
from lightllm.utils.envs_utils import get_unique_server_name
1315

1416
logger = init_logger(__name__)
1517

@@ -29,6 +31,7 @@ def _init_env(args, info_queue: mp.Queue, event: mp.Event):
2931

3032
# 注册graceful 退出的处理
3133
graceful_registry(inspect.currentframe().f_code.co_name)
34+
setproctitle.setproctitle(f"lightllm::{get_unique_server_name()}::nixl_decode_kv_move_manager")
3235

3336
from .up_status import start_up_kv_status_process
3437

lightllm/server/router/model_infer/mode_backend/pd_nixl/decode_node_impl/decode_trans_process.py

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -2,6 +2,7 @@
22
import time
33
import inspect
44
import threading
5+
import setproctitle
56
import torch.multiprocessing as mp
67
import collections
78
import queue
@@ -22,6 +23,7 @@
2223
from lightllm.server.core.objs import StartArgs
2324
from ..nixl_kv_transporter import NixlKVTransporter
2425
from lightllm.utils.error_utils import log_exception
26+
from lightllm.utils.envs_utils import get_unique_server_name
2527

2628
logger = init_logger(__name__)
2729

@@ -48,6 +50,7 @@ def _init_env(
4850
up_status_in_queue: Optional[mp.SimpleQueue],
4951
):
5052
torch.backends.cudnn.enabled = False
53+
setproctitle.setproctitle(f"lightllm::{get_unique_server_name()}::nixl_decode_trans:Device{device_id}")
5154

5255
try:
5356
torch.cuda.set_device(device_id)

lightllm/server/router/model_infer/mode_backend/pd_nixl/decode_node_impl/up_status.py

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -5,6 +5,7 @@
55
import websockets
66
import inspect
77
import pickle
8+
import setproctitle
89

910
from typing import Dict, Union
1011
from dataclasses import asdict
@@ -13,6 +14,7 @@
1314
from lightllm.utils.graceful_utils import graceful_registry
1415
from lightllm.server.pd_io_struct import PD_Master_Obj
1516
import torch.multiprocessing as mp
17+
from lightllm.utils.envs_utils import get_unique_server_name
1618

1719
logger = init_logger(__name__)
1820

@@ -108,6 +110,7 @@ async def up_kv_status_task(self, pd_master_obj: PD_Master_Obj):
108110

109111
def _init_env(args, task_in_queue: mp.SimpleQueue):
110112
graceful_registry(inspect.currentframe().f_code.co_name)
113+
setproctitle.setproctitle(f"lightllm::{get_unique_server_name()}::nixl_up_kv_status")
111114
up_kv_manager = UpStatusManager(args, task_in_queue)
112115
logger.info(f"up kv manager {str(up_kv_manager)} start ok")
113116
while True:

lightllm/server/router/model_infer/mode_backend/pd_nixl/prefill_node_impl/prefill_kv_move_manager.py

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1,4 +1,5 @@
11
import inspect
2+
import setproctitle
23
import torch.multiprocessing as mp
34
import time
45
from typing import List, Dict, Union, Callable
@@ -9,6 +10,7 @@
910
from ..trans_process_obj import KVTransProcess
1011
from ..base_kv_move_manager import BaseKVMoveManager
1112
from lightllm.utils.error_utils import log_exception
13+
from lightllm.utils.envs_utils import get_unique_server_name
1214

1315
logger = init_logger(__name__)
1416

@@ -28,6 +30,7 @@ def _init_env(args, info_queue: mp.Queue, event: mp.Event):
2830

2931
# 注册graceful 退出的处理
3032
graceful_registry(inspect.currentframe().f_code.co_name)
33+
setproctitle.setproctitle(f"lightllm::{get_unique_server_name()}::nixl_prefill_kv_move_manager")
3134

3235
from .prefill_trans_process import start_prefill_trans_process
3336

lightllm/server/router/model_infer/mode_backend/pd_nixl/prefill_node_impl/prefill_trans_process.py

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -2,6 +2,7 @@
22
import time
33
import inspect
44
import threading
5+
import setproctitle
56
import torch.multiprocessing as mp
67
import collections
78
import queue
@@ -15,6 +16,7 @@
1516
from lightllm.server.core.objs import StartArgs
1617
from ..nixl_kv_transporter import NixlKVTransporter
1718
from lightllm.utils.error_utils import log_exception
19+
from lightllm.utils.envs_utils import get_unique_server_name
1820

1921

2022
logger = init_logger(__name__)
@@ -41,6 +43,7 @@ def _init_env(
4143
task_out_queue: mp.Queue,
4244
):
4345
torch.backends.cudnn.enabled = False
46+
setproctitle.setproctitle(f"lightllm::{get_unique_server_name()}::nixl_prefill_trans:Device{device_id}")
4447

4548
try:
4649
torch.cuda.set_device(device_id)

lightllm/server/visualserver/model_infer/__init__.py

Lines changed: 3 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -4,12 +4,13 @@
44
import uuid
55
import os
66
import multiprocessing
7+
import setproctitle
78
from lightllm.utils.retry_utils import retry
89
from rpyc.utils.factory import unix_connect
910
from rpyc.utils.classic import obtain
1011
from rpyc.utils.server import ThreadedServer
1112
from lightllm.utils.graceful_utils import graceful_registry
12-
from lightllm.utils.envs_utils import get_env_start_args
13+
from lightllm.utils.envs_utils import get_env_start_args, get_unique_server_name
1314
from .model_rpc_client import VisualModelRpcClient
1415
from .model_rpc import VisualModelRpcServer
1516
from ..objs import rpyc_config
@@ -18,6 +19,7 @@
1819
def _init_env(socket_path: str, success_event):
1920
# 注册graceful 退出的处理
2021
graceful_registry(inspect.currentframe().f_code.co_name)
22+
setproctitle.setproctitle(f"lightllm::{get_unique_server_name()}::visual_model_infer")
2123

2224
import lightllm.utils.rpyc_fix_utils as _
2325

lightllm/server/visualserver/proxy_manager.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -211,7 +211,7 @@ def start_visual_process(args, pipe_writer):
211211

212212
# 注册graceful 退出的处理
213213
graceful_registry(inspect.currentframe().f_code.co_name)
214-
setproctitle.setproctitle(f"lightllm::{get_unique_server_name()}::visual_server")
214+
setproctitle.setproctitle(f"lightllm::{get_unique_server_name()}::visual_proxy_server")
215215
start_parent_check_thread()
216216
try:
217217
visualserver = ProxyVisualManager(args=args)

lightllm/server/visualserver/visual_only_manager.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -177,7 +177,7 @@ def start_visual_process(args: StartArgs, pipe_writer):
177177

178178
# 注册graceful 退出的处理
179179
graceful_registry(inspect.currentframe().f_code.co_name)
180-
setproctitle.setproctitle(f"lightllm::{get_unique_server_name()}::visual_server")
180+
setproctitle.setproctitle(f"lightllm::{get_unique_server_name()}::visual_only_server")
181181
start_parent_check_thread()
182182

183183
try:

0 commit comments

Comments
 (0)