66import asyncio
77import os
88import shutil
9+ import signal
910import socket
1011import subprocess
1112import sys
@@ -179,27 +180,64 @@ def aks_bastion_extension(yes):
179180 raise CLIInternalError (f"Failed to install bastion extension: { result .error } " )
180181
181182
182- def aks_bastion_set_kubeconfig (kubeconfig_path , port ):
183- """Update the kubeconfig file to point to the local port."""
183+ def aks_bastion_set_kubeconfig (kubeconfig_path , port , cluster_name = None ):
184+ """Update the kubeconfig file to point to the local port.
185+
186+ Args:
187+ kubeconfig_path: Path to the kubeconfig file
188+ port: Local port for the bastion tunnel
189+ cluster_name: Name of the AKS cluster. If provided, searches for exact match in existing kubeconfig.
190+ If not provided, uses current context (for newly downloaded kubeconfigs).
191+ """
184192
185193 logger .debug ("Updating kubeconfig file: %s to use port: %s" , kubeconfig_path , port )
186194 with open (kubeconfig_path , "r" ) as f :
187195 data = yaml .load (f , Loader = yaml .SafeLoader )
188- current_context = data ["current-context" ]
189- current_cluster = ""
190- for context in data ["contexts" ]:
191- if context ["name" ] == current_context :
192- current_cluster = context ["context" ]["cluster" ]
193-
194- for cluster in data ["clusters" ]:
195- if cluster ["name" ] == current_cluster :
196+
197+ # Find the target cluster
198+ target_cluster_name = None
199+
200+ if cluster_name :
201+ # For existing kubeconfigs, search for exact match in clusters
202+ logger .debug ("Searching for cluster '%s' in existing kubeconfig" , cluster_name )
203+
204+ for cluster in data .get ("clusters" , []):
205+ if cluster ["name" ] == cluster_name :
206+ target_cluster_name = cluster_name
207+ logger .debug ("Found exact match for cluster name: %s" , target_cluster_name )
208+ break
209+
210+ if not target_cluster_name :
211+ raise CLIInternalError (
212+ f"Could not find cluster '{ cluster_name } ' in the provided kubeconfig. "
213+ "The cluster name from Azure might differ from the name in your kubeconfig file."
214+ )
215+ else :
216+ # If cluster_name not provided, use current context
217+ current_context = data .get ("current-context" )
218+ if current_context :
219+ for context in data .get ("contexts" , []):
220+ if context ["name" ] == current_context :
221+ target_cluster_name = context ["context" ]["cluster" ]
222+ logger .debug ("Using current context cluster: %s" , target_cluster_name )
223+ break
224+
225+ if not target_cluster_name :
226+ raise CLIInternalError ("Could not determine which cluster to update in kubeconfig" )
227+
228+ # Update the cluster configuration
229+ for cluster in data .get ("clusters" , []):
230+ if cluster ["name" ] == target_cluster_name :
196231 server = cluster ["cluster" ]["server" ]
197232 hostname = urlparse (server ).hostname
198233 # update the server URL to point to the local port
199234 cluster ["cluster" ]["server" ] = f"https://localhost:{ port } /"
200235 # set the tls-server-name to the hostname
201236 cluster ["cluster" ]["tls-server-name" ] = hostname
237+ logger .debug ("Updated cluster '%s' to use localhost:%s with tls-server-name=%s" ,
238+ target_cluster_name , port , hostname )
202239 break
240+
203241 with open (kubeconfig_path , "w" ) as f :
204242 yaml .dump (data , f )
205243
@@ -441,31 +479,45 @@ async def _aks_bastion_launch_tunnel(bastion_resource, port, mc_id):
441479 f"--name { bastion_resource .name } --port { port } --target-resource-id { mc_id } --resource-port 443"
442480 )
443481 logger .warning ("Creating bastion tunnel with command: '%s'" , cmd )
482+
483+ # Use start_new_session on Unix to create a new process group
484+ # This allows us to kill the entire process tree when cleaning up
485+ start_new_session = not sys .platform .startswith ("win" )
444486 tunnel_proces = await asyncio .create_subprocess_exec (
445487 * (cmd .split ()),
446488 stdin = asyncio .subprocess .DEVNULL ,
447489 stdout = asyncio .subprocess .DEVNULL ,
448490 stderr = asyncio .subprocess .DEVNULL ,
449491 shell = False ,
492+ start_new_session = start_new_session ,
450493 )
451494 logger .info ("Tunnel launched with PID: %s" , tunnel_proces .pid )
452495
453496 # tunnel process must not exit unless it encounters a failure or is deliberately shut down
454497 await tunnel_proces .wait ()
455498 logger .error ("Bastion tunnel exited with code %s" , tunnel_proces .returncode )
456499 except asyncio .CancelledError :
457- # attempt to terminate the tunnel process gracefully
500+ # attempt to terminate the tunnel process and all its children
458501 if tunnel_proces is not None :
459- logger .info ("Tunnel process was cancelled. Terminating..." )
460- tunnel_proces . terminate ( )
502+ logger .info ("Tunnel process was cancelled. Terminating process tree ..." )
503+ _aks_bastion_kill_process_tree ( tunnel_proces )
461504 try :
462505 await asyncio .wait_for (tunnel_proces .wait (), timeout = 5 )
463506 logger .info ("Tunnel process exited cleanly after termination." )
464507 except asyncio .TimeoutError :
465508 logger .warning (
466- "Tunnel process did not exit after SIGTERM. Sending SIGKILL ..."
509+ "Tunnel process did not exit after SIGTERM. Force killing ..."
467510 )
468- tunnel_proces .kill ()
511+ if sys .platform .startswith ("win" ):
512+ # On Windows, taskkill /F should have already force-killed
513+ # but try again with kill() as fallback
514+ tunnel_proces .kill ()
515+ else :
516+ # On Unix, send SIGKILL to the process group
517+ try :
518+ os .killpg (os .getpgid (tunnel_proces .pid ), signal .SIGKILL )
519+ except (ProcessLookupError , PermissionError ):
520+ tunnel_proces .kill ()
469521 await asyncio .wait_for (tunnel_proces .wait (), timeout = 5 )
470522 logger .warning (
471523 "Tunnel process forcefully killed with code %s" ,
@@ -475,6 +527,39 @@ async def _aks_bastion_launch_tunnel(bastion_resource, port, mc_id):
475527 logger .warning ("Tunnel process was cancelled before it could be launched." )
476528
477529
530+ def _aks_bastion_kill_process_tree (process ):
531+ """Kill a process and all its children.
532+
533+ On Windows, az.cmd spawns a child Python process, so we need to kill the entire
534+ process tree to avoid orphaned processes.
535+ """
536+ if process is None :
537+ return
538+
539+ pid = process .pid
540+ if sys .platform .startswith ("win" ):
541+ # On Windows, use taskkill with /T flag to kill the process tree
542+ try :
543+ subprocess .run (
544+ ["taskkill" , "/T" , "/F" , "/PID" , str (pid )],
545+ capture_output = True ,
546+ check = False ,
547+ )
548+ logger .debug ("Killed process tree for PID %s using taskkill" , pid )
549+ except Exception as e : # pylint: disable=broad-except
550+ logger .warning ("Failed to kill process tree with taskkill: %s" , e )
551+ # Fallback to terminate/kill
552+ process .terminate ()
553+ else :
554+ # On Unix, kill the process group
555+ try :
556+ os .killpg (os .getpgid (pid ), signal .SIGTERM )
557+ logger .debug ("Sent SIGTERM to process group for PID %s" , pid )
558+ except (ProcessLookupError , PermissionError ) as e :
559+ logger .debug ("Failed to kill process group: %s" , e )
560+ process .terminate ()
561+
562+
478563async def _aks_bastion_validate_tunnel (port ):
479564 """Check if the bastion tunnel is active on the specified port."""
480565 # give the tunnel some time to establish before checking the port
0 commit comments