Skip to content

Commit 4c3fcca

Browse files
committed
fix bastion tunnel close issue
1 parent 6d5d041 commit 4c3fcca

File tree

1 file changed

+53
-5
lines changed
  • src/aks-preview/azext_aks_preview/bastion

1 file changed

+53
-5
lines changed

src/aks-preview/azext_aks_preview/bastion/bastion.py

Lines changed: 53 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -6,6 +6,7 @@
66
import asyncio
77
import os
88
import shutil
9+
import signal
910
import socket
1011
import subprocess
1112
import sys
@@ -478,31 +479,45 @@ async def _aks_bastion_launch_tunnel(bastion_resource, port, mc_id):
478479
f"--name {bastion_resource.name} --port {port} --target-resource-id {mc_id} --resource-port 443"
479480
)
480481
logger.warning("Creating bastion tunnel with command: '%s'", cmd)
482+
483+
# Use start_new_session on Unix to create a new process group
484+
# This allows us to kill the entire process tree when cleaning up
485+
start_new_session = not sys.platform.startswith("win")
481486
tunnel_proces = await asyncio.create_subprocess_exec(
482487
*(cmd.split()),
483488
stdin=asyncio.subprocess.DEVNULL,
484489
stdout=asyncio.subprocess.DEVNULL,
485490
stderr=asyncio.subprocess.DEVNULL,
486491
shell=False,
492+
start_new_session=start_new_session,
487493
)
488494
logger.info("Tunnel launched with PID: %s", tunnel_proces.pid)
489495

490496
# tunnel process must not exit unless it encounters a failure or is deliberately shut down
491497
await tunnel_proces.wait()
492498
logger.error("Bastion tunnel exited with code %s", tunnel_proces.returncode)
493499
except asyncio.CancelledError:
494-
# attempt to terminate the tunnel process gracefully
500+
# attempt to terminate the tunnel process and all its children
495501
if tunnel_proces is not None:
496-
logger.info("Tunnel process was cancelled. Terminating...")
497-
tunnel_proces.terminate()
502+
logger.info("Tunnel process was cancelled. Terminating process tree...")
503+
_aks_bastion_kill_process_tree(tunnel_proces)
498504
try:
499505
await asyncio.wait_for(tunnel_proces.wait(), timeout=5)
500506
logger.info("Tunnel process exited cleanly after termination.")
501507
except asyncio.TimeoutError:
502508
logger.warning(
503-
"Tunnel process did not exit after SIGTERM. Sending SIGKILL..."
509+
"Tunnel process did not exit after SIGTERM. Force killing..."
504510
)
505-
tunnel_proces.kill()
511+
if sys.platform.startswith("win"):
512+
# On Windows, taskkill /F should have already force-killed
513+
# but try again with kill() as fallback
514+
tunnel_proces.kill()
515+
else:
516+
# On Unix, send SIGKILL to the process group
517+
try:
518+
os.killpg(os.getpgid(tunnel_proces.pid), signal.SIGKILL)
519+
except (ProcessLookupError, PermissionError):
520+
tunnel_proces.kill()
506521
await asyncio.wait_for(tunnel_proces.wait(), timeout=5)
507522
logger.warning(
508523
"Tunnel process forcefully killed with code %s",
@@ -512,6 +527,39 @@ async def _aks_bastion_launch_tunnel(bastion_resource, port, mc_id):
512527
logger.warning("Tunnel process was cancelled before it could be launched.")
513528

514529

530+
def _aks_bastion_kill_process_tree(process):
531+
"""Kill a process and all its children.
532+
533+
On Windows, az.cmd spawns a child Python process, so we need to kill the entire
534+
process tree to avoid orphaned processes.
535+
"""
536+
if process is None:
537+
return
538+
539+
pid = process.pid
540+
if sys.platform.startswith("win"):
541+
# On Windows, use taskkill with /T flag to kill the process tree
542+
try:
543+
subprocess.run(
544+
["taskkill", "/T", "/F", "/PID", str(pid)],
545+
capture_output=True,
546+
check=False,
547+
)
548+
logger.debug("Killed process tree for PID %s using taskkill", pid)
549+
except Exception as e:
550+
logger.warning("Failed to kill process tree with taskkill: %s", e)
551+
# Fallback to terminate/kill
552+
process.terminate()
553+
else:
554+
# On Unix, kill the process group
555+
try:
556+
os.killpg(os.getpgid(pid), signal.SIGTERM)
557+
logger.debug("Sent SIGTERM to process group for PID %s", pid)
558+
except (ProcessLookupError, PermissionError) as e:
559+
logger.debug("Failed to kill process group: %s", e)
560+
process.terminate()
561+
562+
515563
async def _aks_bastion_validate_tunnel(port):
516564
"""Check if the bastion tunnel is active on the specified port."""
517565
# give the tunnel some time to establish before checking the port

0 commit comments

Comments
 (0)