|
| 1 | +""" |
| 2 | +Connection handler: protocol, mmap I/O, request loop. |
| 3 | +
|
| 4 | +Implements the binary socket protocol matching the Rust wasm-udf-server: |
| 5 | +handshake, control signal dispatch, and UDF request loop with mmap I/O. |
| 6 | +""" |
| 7 | +from __future__ import annotations |
| 8 | + |
| 9 | +import array |
| 10 | +import logging |
| 11 | +import mmap |
| 12 | +import os |
| 13 | +import select |
| 14 | +import socket |
| 15 | +import struct |
| 16 | +import threading |
| 17 | +import traceback |
| 18 | +from typing import TYPE_CHECKING |
| 19 | + |
| 20 | +from .control import dispatch_control_signal |
| 21 | +from .registry import call_function |
| 22 | + |
| 23 | +if TYPE_CHECKING: |
| 24 | + from .server import SharedRegistry |
| 25 | + |
| 26 | +logger = logging.getLogger('collocated.connection') |
| 27 | + |
| 28 | +# Protocol constants |
| 29 | +PROTOCOL_VERSION = 1 |
| 30 | +STATUS_OK = 200 |
| 31 | +STATUS_BAD_REQUEST = 400 |
| 32 | +STATUS_ERROR = 500 |
| 33 | + |
| 34 | +# Minimum output mmap size to avoid repeated ftruncate |
| 35 | +_MIN_OUTPUT_SIZE = 128 * 1024 |
| 36 | + |
| 37 | + |
| 38 | +def handle_connection( |
| 39 | + conn: socket.socket, |
| 40 | + shared_registry: SharedRegistry, |
| 41 | + shutdown_event: threading.Event, |
| 42 | +) -> None: |
| 43 | + """Handle a single client connection (runs in a thread pool worker).""" |
| 44 | + try: |
| 45 | + _handle_connection_inner(conn, shared_registry, shutdown_event) |
| 46 | + except Exception: |
| 47 | + logger.error(f'Connection error:\n{traceback.format_exc()}') |
| 48 | + finally: |
| 49 | + try: |
| 50 | + conn.close() |
| 51 | + except OSError: |
| 52 | + pass |
| 53 | + |
| 54 | + |
| 55 | +def _handle_connection_inner( |
| 56 | + conn: socket.socket, |
| 57 | + shared_registry: SharedRegistry, |
| 58 | + shutdown_event: threading.Event, |
| 59 | +) -> None: |
| 60 | + """Inner connection handler (may raise).""" |
| 61 | + # --- Handshake --- |
| 62 | + # Receive 16 bytes: [version: u64 LE][namelen: u64 LE] |
| 63 | + header = _recv_exact(conn, 16) |
| 64 | + if header is None: |
| 65 | + return |
| 66 | + version, namelen = struct.unpack('<QQ', header) |
| 67 | + |
| 68 | + if version != PROTOCOL_VERSION: |
| 69 | + logger.warning(f'Unsupported protocol version: {version}') |
| 70 | + return |
| 71 | + |
| 72 | + # Receive function name + 2 FDs via SCM_RIGHTS |
| 73 | + fd_model = array.array('i', [0, 0]) |
| 74 | + msg, ancdata, flags, addr = conn.recvmsg( |
| 75 | + namelen, |
| 76 | + socket.CMSG_LEN(2 * fd_model.itemsize), |
| 77 | + ) |
| 78 | + if len(ancdata) != 1: |
| 79 | + logger.warning(f'Expected 1 ancdata, got {len(ancdata)}') |
| 80 | + return |
| 81 | + |
| 82 | + function_name = msg.decode('utf8') |
| 83 | + input_fd, output_fd = struct.unpack('<ii', ancdata[0][2]) |
| 84 | + |
| 85 | + # --- Control signal path --- |
| 86 | + if function_name.startswith('@@'): |
| 87 | + logger.info(f"Received control signal '{function_name}'") |
| 88 | + _handle_control_signal( |
| 89 | + conn, function_name, input_fd, output_fd, shared_registry, |
| 90 | + ) |
| 91 | + return |
| 92 | + |
| 93 | + # --- UDF request loop --- |
| 94 | + logger.info(f"Received request for function '{function_name}'") |
| 95 | + _handle_udf_loop( |
| 96 | + conn, function_name, input_fd, output_fd, |
| 97 | + shared_registry, shutdown_event, |
| 98 | + ) |
| 99 | + |
| 100 | + |
| 101 | +def _handle_control_signal( |
| 102 | + conn: socket.socket, |
| 103 | + signal_name: str, |
| 104 | + input_fd: int, |
| 105 | + output_fd: int, |
| 106 | + shared_registry: SharedRegistry, |
| 107 | +) -> None: |
| 108 | + """Handle a @@-prefixed control signal (one-shot request-response).""" |
| 109 | + try: |
| 110 | + # Read 8-byte request length |
| 111 | + len_buf = _recv_exact(conn, 8) |
| 112 | + if len_buf is None: |
| 113 | + return |
| 114 | + length = struct.unpack('<Q', len_buf)[0] |
| 115 | + |
| 116 | + # Read input data from mmap (if any) |
| 117 | + request_data = b'' |
| 118 | + if length > 0: |
| 119 | + mem = mmap.mmap( |
| 120 | + input_fd, length, mmap.MAP_SHARED, mmap.PROT_READ, |
| 121 | + ) |
| 122 | + try: |
| 123 | + request_data = mem[:length] |
| 124 | + finally: |
| 125 | + mem.close() |
| 126 | + |
| 127 | + # Dispatch |
| 128 | + result = dispatch_control_signal( |
| 129 | + signal_name, request_data, shared_registry, |
| 130 | + ) |
| 131 | + |
| 132 | + if result.ok: |
| 133 | + # Write response to output mmap |
| 134 | + response_bytes = result.data.encode('utf8') |
| 135 | + response_size = len(response_bytes) |
| 136 | + os.ftruncate(output_fd, max(_MIN_OUTPUT_SIZE, response_size)) |
| 137 | + os.lseek(output_fd, 0, os.SEEK_SET) |
| 138 | + os.write(output_fd, response_bytes) |
| 139 | + |
| 140 | + # Send [status=200, size] |
| 141 | + conn.sendall(struct.pack('<QQ', STATUS_OK, response_size)) |
| 142 | + logger.debug( |
| 143 | + f"Control signal '{signal_name}' succeeded, " |
| 144 | + f'{response_size} bytes', |
| 145 | + ) |
| 146 | + else: |
| 147 | + # Send [status=400, len, message] |
| 148 | + err_bytes = result.data.encode('utf8') |
| 149 | + conn.sendall( |
| 150 | + struct.pack( |
| 151 | + f'<QQ{len(err_bytes)}s', |
| 152 | + STATUS_BAD_REQUEST, len(err_bytes), err_bytes, |
| 153 | + ), |
| 154 | + ) |
| 155 | + logger.warning( |
| 156 | + f"Control signal '{signal_name}' failed: {result.data}", |
| 157 | + ) |
| 158 | + finally: |
| 159 | + os.close(input_fd) |
| 160 | + os.close(output_fd) |
| 161 | + |
| 162 | + |
| 163 | +def _handle_udf_loop( |
| 164 | + conn: socket.socket, |
| 165 | + function_name: str, |
| 166 | + input_fd: int, |
| 167 | + output_fd: int, |
| 168 | + shared_registry: SharedRegistry, |
| 169 | + shutdown_event: threading.Event, |
| 170 | +) -> None: |
| 171 | + """Handle the UDF request loop for a single function.""" |
| 172 | + # Track output mmap size to avoid repeated ftruncate |
| 173 | + current_output_size = 0 |
| 174 | + |
| 175 | + try: |
| 176 | + # Get thread-local registry |
| 177 | + registry = shared_registry.get_thread_local_registry() |
| 178 | + |
| 179 | + while not shutdown_event.is_set(): |
| 180 | + # Select-based recv with 100ms timeout for shutdown checks |
| 181 | + readable, _, _ = select.select([conn], [], [], 0.1) |
| 182 | + if not readable: |
| 183 | + continue |
| 184 | + |
| 185 | + # Read 8-byte request length |
| 186 | + len_buf = _recv_exact(conn, 8) |
| 187 | + if len_buf is None: |
| 188 | + break |
| 189 | + length = struct.unpack('<Q', len_buf)[0] |
| 190 | + if length == 0: |
| 191 | + break |
| 192 | + |
| 193 | + # Read input from mmap |
| 194 | + mem = mmap.mmap( |
| 195 | + input_fd, length, mmap.MAP_SHARED, mmap.PROT_READ, |
| 196 | + ) |
| 197 | + try: |
| 198 | + input_data = bytes(mem[:length]) |
| 199 | + finally: |
| 200 | + mem.close() |
| 201 | + |
| 202 | + # Refresh registry if generation changed |
| 203 | + registry = shared_registry.get_thread_local_registry() |
| 204 | + |
| 205 | + # Call function |
| 206 | + try: |
| 207 | + output_data = call_function(registry, function_name, input_data) |
| 208 | + |
| 209 | + # Write result to output mmap |
| 210 | + response_size = len(output_data) |
| 211 | + needed = max(_MIN_OUTPUT_SIZE, response_size) |
| 212 | + if needed > current_output_size: |
| 213 | + os.ftruncate(output_fd, needed) |
| 214 | + current_output_size = needed |
| 215 | + os.lseek(output_fd, 0, os.SEEK_SET) |
| 216 | + os.write(output_fd, output_data) |
| 217 | + |
| 218 | + # Send [status=200, size] |
| 219 | + conn.sendall(struct.pack('<QQ', STATUS_OK, response_size)) |
| 220 | + |
| 221 | + except Exception as e: |
| 222 | + error_msg = ( |
| 223 | + f"error in function '{function_name}': {e}" |
| 224 | + ) |
| 225 | + logger.error(error_msg) |
| 226 | + for line in traceback.format_exc().splitlines(): |
| 227 | + logger.error(line) |
| 228 | + err_bytes = error_msg.encode('utf8') |
| 229 | + conn.sendall( |
| 230 | + struct.pack( |
| 231 | + f'<QQ{len(err_bytes)}s', |
| 232 | + STATUS_ERROR, len(err_bytes), err_bytes, |
| 233 | + ), |
| 234 | + ) |
| 235 | + break |
| 236 | + |
| 237 | + finally: |
| 238 | + os.close(input_fd) |
| 239 | + os.close(output_fd) |
| 240 | + |
| 241 | + |
| 242 | +def _recv_exact(sock: socket.socket, n: int) -> bytes | None: |
| 243 | + """Receive exactly n bytes, or return None on EOF.""" |
| 244 | + buf = bytearray() |
| 245 | + while len(buf) < n: |
| 246 | + chunk = sock.recv(n - len(buf)) |
| 247 | + if not chunk: |
| 248 | + return None |
| 249 | + buf.extend(chunk) |
| 250 | + return bytes(buf) |
0 commit comments