|
19 | 19 | import os |
20 | 20 | import shutil |
21 | 21 | import signal |
22 | | -import socket |
23 | 22 | import subprocess |
24 | 23 | import sys |
25 | 24 | import tempfile |
|
63 | 62 | write_local_file, |
64 | 63 | ) |
65 | 64 |
|
66 | | -# Read ports from environment variables; use default values if not set |
67 | | -FD_API_PORT = int(os.getenv("FD_API_PORT", 8188)) |
68 | | -FD_ENGINE_QUEUE_PORT = int(os.getenv("FD_ENGINE_QUEUE_PORT", 8133)) |
69 | | -FD_METRICS_PORT = int(os.getenv("FD_METRICS_PORT", 8233)) |
70 | | -FD_CACHE_QUEUE_PORT = int(os.getenv("FD_CACHE_QUEUE_PORT", 8333)) |
71 | | - |
72 | | -# List of ports to clean before and after tests |
73 | | -PORTS_TO_CLEAN = [FD_API_PORT, FD_ENGINE_QUEUE_PORT, FD_METRICS_PORT, FD_CACHE_QUEUE_PORT] |
74 | | - |
75 | | - |
76 | | -def is_port_open(host: str, port: int, timeout=1.0): |
77 | | - """ |
78 | | - Check if a TCP port is open on the given host. |
79 | | - Returns True if connection succeeds, False otherwise. |
80 | | - """ |
81 | | - try: |
82 | | - with socket.create_connection((host, port), timeout): |
83 | | - return True |
84 | | - except Exception: |
85 | | - return False |
86 | | - |
87 | | - |
88 | | -def _clean_cuda_process(): |
89 | | - """ |
90 | | - Kill processes that are using CUDA devices. |
91 | | - NOTE: Do not call this function directly, use the `clean` function instead. |
92 | | - """ |
93 | | - try: |
94 | | - subprocess.run("fuser -k /dev/nvidia*", shell=True, timeout=5) |
95 | | - except (subprocess.TimeoutExpired, subprocess.CalledProcessError, FileNotFoundError): |
96 | | - pass |
97 | | - |
98 | | - |
99 | | -def kill_process_on_port(port: int): |
100 | | - """ |
101 | | - Kill processes that are listening on the given port. |
102 | | - Uses multiple methods to ensure thorough cleanup. |
103 | | - """ |
104 | | - current_pid = os.getpid() |
105 | | - parent_pid = os.getppid() |
106 | | - |
107 | | - # Method 1: Use lsof to find processes |
108 | | - try: |
109 | | - output = subprocess.check_output(f"lsof -i:{port} -t", shell=True).decode().strip() |
110 | | - for pid in output.splitlines(): |
111 | | - pid = int(pid) |
112 | | - if pid in (current_pid, parent_pid): |
113 | | - print(f"Skip killing current process (pid={pid}) on port {port}") |
114 | | - continue |
115 | | - try: |
116 | | - # First try SIGTERM for graceful shutdown |
117 | | - os.kill(pid, signal.SIGTERM) |
118 | | - time.sleep(1) |
119 | | - # Then SIGKILL if still running |
120 | | - os.kill(pid, signal.SIGKILL) |
121 | | - print(f"Killed process on port {port}, pid={pid}") |
122 | | - except ProcessLookupError: |
123 | | - pass # Process already terminated |
124 | | - except subprocess.CalledProcessError: |
125 | | - pass |
126 | | - |
127 | | - # Method 2: Use netstat and fuser as backup |
128 | | - try: |
129 | | - # Find processes using netstat and awk |
130 | | - cmd = f"netstat -tulpn 2>/dev/null | grep :{port} | awk '{{print $7}}' | cut -d'/' -f1" |
131 | | - output = subprocess.check_output(cmd, shell=True).decode().strip() |
132 | | - for pid in output.splitlines(): |
133 | | - if pid and pid.isdigit(): |
134 | | - pid = int(pid) |
135 | | - if pid in (current_pid, parent_pid): |
136 | | - continue |
137 | | - try: |
138 | | - os.kill(pid, signal.SIGKILL) |
139 | | - print(f"Killed process (netstat) on port {port}, pid={pid}") |
140 | | - except ProcessLookupError: |
141 | | - pass |
142 | | - except (subprocess.CalledProcessError, FileNotFoundError): |
143 | | - pass |
144 | | - |
145 | | - # Method 3: Use fuser if available |
146 | | - try: |
147 | | - subprocess.run(f"fuser -k {port}/tcp", shell=True, timeout=5) |
148 | | - except (subprocess.TimeoutExpired, subprocess.CalledProcessError, FileNotFoundError): |
149 | | - pass |
150 | | - |
151 | | - |
152 | | -def clean_ports(ports=None): |
153 | | - """ |
154 | | - Kill all processes occupying the ports |
155 | | - """ |
156 | | - if ports is None: |
157 | | - ports = PORTS_TO_CLEAN |
158 | | - |
159 | | - print(f"Cleaning ports: {ports}") |
160 | | - for port in ports: |
161 | | - kill_process_on_port(port) |
162 | | - |
163 | | - # Double check and retry if ports are still in use |
164 | | - time.sleep(2) |
165 | | - for port in ports: |
166 | | - if is_port_open("127.0.0.1", port, timeout=0.1): |
167 | | - print(f"Port {port} still in use, retrying cleanup...") |
168 | | - kill_process_on_port(port) |
169 | | - time.sleep(1) |
170 | | - |
171 | | - |
172 | | -def clean(ports=None): |
173 | | - """ |
174 | | - Clean up resources used during testing. |
175 | | - """ |
176 | | - clean_ports(ports) |
177 | | - |
178 | | - # Clean CUDA devices before and after tests. |
179 | | - # NOTE: It is dangerous to use this flag on development machines, as it may kill other processes |
180 | | - clean_cuda = int(os.getenv("CLEAN_CUDA", "0")) == 1 |
181 | | - if clean_cuda: |
182 | | - _clean_cuda_process() |
| 65 | +current_dir = os.path.dirname(os.path.abspath(__file__)) |
| 66 | +project_root = os.path.abspath(os.path.join(current_dir, "..")) |
| 67 | +if project_root not in sys.path: |
| 68 | + sys.path.insert(0, project_root) |
183 | 69 |
|
| 70 | +from e2e.utils.serving_utils import ( |
| 71 | + FD_CACHE_QUEUE_PORT, |
| 72 | + FD_ENGINE_QUEUE_PORT, |
| 73 | + clean_ports, |
| 74 | +) |
184 | 75 |
|
185 | 76 | INPUT_BATCH = """ |
186 | 77 | {"custom_id": "req-00001", "method": "POST", "url": "/v1/chat/completions", "body": {"messages": [{"role": "user", "content": "Can you write a short poem? (id=1)"}], "temperature": 0.7, "max_tokens": 200}} |
|
0 commit comments