@@ -242,13 +242,14 @@ class MhrvVpnService : VpnService() {
242242 tun = parcelFd
243243
244244 // 3) Start tun2proxy on a worker thread. It blocks until stop() or
245- // shutdown. We detach the fd so ownership transfers cleanly; the
246- // ParcelFileDescriptor (`tun`) still holds a reference, so closing
247- // it at teardown reliably tears down the TUN even if tun2proxy
248- // doesn't cleanly exit.
245+ // shutdown. We detach the fd so ownership transfers cleanly to
246+ // tun2proxy (closeFdOnDrop = true closes it on return from run()).
247+ // The ParcelFileDescriptor (`tun`) we keep is post-detach — its
248+ // own close() is a no-op for the underlying fd, so the worker is
249+ // the sole owner once it's running.
249250 val detachedFd = parcelFd.detachFd()
250251 tun2proxyRunning.set(true )
251- tun2proxyThread = Thread ({
252+ val worker = Thread ({
252253 try {
253254 val rc = Tun2proxy .run (
254255 " socks5://127.0.0.1:$socks5Port " ,
@@ -264,7 +265,29 @@ class MhrvVpnService : VpnService() {
264265 } finally {
265266 tun2proxyRunning.set(false )
266267 }
267- }, " tun2proxy" ).apply { start() }
268+ }, " tun2proxy" )
269+ try {
270+ worker.start()
271+ tun2proxyThread = worker
272+ } catch (t: Throwable ) {
273+ // Thread.start can throw OutOfMemoryError under extreme memory
274+ // pressure. The fd we just detached has no owner — without an
275+ // explicit close it leaks for the life of the process. Adopt
276+ // it into a fresh ParcelFileDescriptor purely so we can call
277+ // close() on it.
278+ Log .e(TAG , " tun2proxy thread start failed: ${t.message} " , t)
279+ tun2proxyRunning.set(false )
280+ try {
281+ ParcelFileDescriptor .adoptFd(detachedFd).close()
282+ } catch (closeErr: Throwable ) {
283+ Log .w(TAG , " adoptFd($detachedFd ).close failed: ${closeErr.message} " )
284+ }
285+ Native .stopProxy(proxyHandle)
286+ proxyHandle = 0L
287+ try { stopForeground(STOP_FOREGROUND_REMOVE ) } catch (_: Throwable ) {}
288+ stopSelf()
289+ return
290+ }
268291
269292 // (startForeground was already called at the top of this method
270293 // to satisfy Android 8+'s foreground-service contract — see the
@@ -291,12 +314,23 @@ class MhrvVpnService : VpnService() {
291314 * tun2proxy still forwarding packets into a half-dead Rust runtime
292315 * while the runtime is force-aborting its tasks — that's the scenario
293316 * that manifested as "Stop crashes the app" when there were in-flight
294- * relay requests piled up against a dead Apps Script deployment. The
295- * correct order is:
296- * 1. Signal tun2proxy to stop (cooperative).
297- * 2. Close the TUN fd — forces tun2proxy's read() to return EBADF.
298- * 3. Join the tun2proxy thread (now it really will exit).
299- * 4. Shut down the Rust proxy runtime (nothing left to forward to).
317+ * relay requests piled up against a dead Apps Script deployment.
318+ *
319+ * Steps, with the bound on each one called out so a hung native call
320+ * cannot stall the whole teardown thread:
321+ * 1. Signal tun2proxy to stop (cooperative). Bounded by a 2s
322+ * side-thread join — if the JNI call hangs we proceed anyway.
323+ * 2. Drop our `ParcelFileDescriptor` reference. Because we already
324+ * called detachFd() at startup, this is a no-op for the
325+ * underlying fd — the worker (closeFdOnDrop=true) owns it.
326+ * We keep the call only so the PROXY_ONLY / failed-establish
327+ * paths still null out the field cleanly.
328+ * 3. Join the tun2proxy thread, bounded at 4s. If the worker is
329+ * stuck we log and move on — the runtime shutdown below will
330+ * knock the rest of the world over.
331+ * 4. Shut down the Rust proxy runtime, bounded by `rt.shutdown_timeout`
332+ * on the Rust side (5s). This is the hard backstop: the listener
333+ * socket is released here regardless of what the worker is doing.
300334 */
301335 private fun teardown () {
302336 // Idempotency guard. Without this, onDestroy racing the
@@ -315,17 +349,29 @@ class MhrvVpnService : VpnService() {
315349 " (tun2proxy running=${tun2proxyRunning.get()} , proxyHandle=$proxyHandle )" ,
316350 )
317351
318- // 1. Cooperative stop signal.
352+ // 1. Cooperative stop signal — bounded so a hung Rust call cannot
353+ // stall the entire teardown thread. We've never observed
354+ // Tun2proxy.stop() block in practice, but the contract isn't
355+ // documented as bounded and the rest of teardown already takes
356+ // care to be timeout-bounded; this closes the gap.
319357 if (tun2proxyRunning.get()) {
320- try { Tun2proxy .stop() } catch (t: Throwable ) {
321- Log .w(TAG , " Tun2proxy.stop: ${t.message} " )
358+ val stopper = Thread ({
359+ try { Tun2proxy .stop() } catch (t: Throwable ) {
360+ Log .w(TAG , " Tun2proxy.stop: ${t.message} " )
361+ }
362+ }, " mhrv-tun2proxy-stop" ).apply { start() }
363+ try { stopper.join(2_000 ) } catch (_: InterruptedException ) {}
364+ if (stopper.isAlive) {
365+ Log .w(TAG , " Tun2proxy.stop did not return within 2s — proceeding" )
322366 }
323367 }
324368
325- // 2. Close the TUN fd. Since we called detachFd earlier the
326- // ParcelFileDescriptor no longer owns the fd and close() here
327- // is a no-op; the real fd is owned by tun2proxy (closeFdOnDrop
328- // = true), which closes it on return from run().
369+ // 2. Drop our PFD reference. detachFd at startup means this
370+ // close() is a no-op for the underlying fd — tun2proxy owns
371+ // it (closeFdOnDrop = true) and closes it on return from
372+ // run(). The call is kept only to null the field cleanly on
373+ // paths that never reached detachFd (PROXY_ONLY, or an
374+ // establish() that failed mid-builder).
329375 try { tun?.close() } catch (t: Throwable ) {
330376 Log .w(TAG , " tun.close: ${t.message} " )
331377 }
0 commit comments