Skip to content

Commit b2fc2c6

Browse files
authored
[XPU] fix zmq err catch (#7844)
* fix zmq err catch * fix unit * fix unit
1 parent 5348902 commit b2fc2c6

1 file changed

Lines changed: 19 additions & 3 deletions

File tree

fastdeploy/inter_communicator/zmq_server.py

Lines changed: 19 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -15,6 +15,7 @@
1515
"""
1616

1717
import os
18+
import pickle
1819
import threading
1920
import time
2021
import traceback
@@ -110,7 +111,11 @@ def recv_json(self, flags: int = 0):
110111
try:
111112
# receive from socket
112113
msg = self.socket.recv(flags=flags)
113-
data_dict = self.socket._deserialize(msg, lambda buf: jsonapi.loads(buf))
114+
try:
115+
data_dict = self.socket._deserialize(msg, lambda buf: jsonapi.loads(buf))
116+
except (UnicodeDecodeError, ValueError) as e:
117+
llm_logger.warning(f"recv_json decode failed, msg={msg}, err={e}")
118+
raise
114119

115120
# collect zmq recv metrics
116121
_zmq_metrics_stats.msg_bytes_recv_total += len(msg)
@@ -152,7 +157,11 @@ def recv_pyobj(self, flags: int = 0):
152157
_zmq_metrics_stats = ZMQMetricsStats()
153158
self._ensure_socket()
154159
data_bytes = self.socket.recv(flags=flags)
155-
envelope = ForkingPickler.loads(data_bytes)
160+
try:
161+
envelope = ForkingPickler.loads(data_bytes)
162+
except (UnicodeDecodeError, ValueError, pickle.UnpicklingError) as e:
163+
llm_logger.warning(f"recv_pyobj decode failed, msg={data_bytes}, err={e}")
164+
raise
156165
if isinstance(envelope, dict):
157166
if "__meta" in envelope and "send_ts" in envelope["__meta"]:
158167
_zmq_metrics_stats.msg_recv_total += 1
@@ -539,7 +548,12 @@ def recv_control_cmd(self):
539548
"""
540549
self._ensure_socket()
541550
try:
542-
client, _, task_data = self.socket.recv_multipart(flags=zmq.NOBLOCK)
551+
frames = self.socket.recv_multipart(flags=zmq.NOBLOCK)
552+
if len(frames) < 2:
553+
llm_logger.warning(f"recv_control_cmd: unexpected frame count {len(frames)}, dropping message")
554+
return None
555+
client = frames[0]
556+
task_data = frames[-1]
543557
task = msgpack.unpackb(task_data)
544558
task_id_str = task["task_id"]
545559
except zmq.Again:
@@ -577,6 +591,8 @@ def close(self):
577591
llm_logger.info("ZMQ server is closing connection...")
578592
try:
579593
if self.socket is not None and not self.socket.closed:
594+
if self.address:
595+
self.socket.unbind(self.address)
580596
self.socket.close()
581597
if not self.context.closed:
582598
self.context.term()

0 commit comments

Comments
 (0)