Skip to content

Commit ba23d62

Browse files
committed
WIP: stash changes for blue green aks-preview changes
need to add tests and debug further
1 parent da5b0bf commit ba23d62

7 files changed

Lines changed: 428 additions & 15 deletions

File tree

src/aks-preview/azext_aks_preview/_consts.py

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -109,6 +109,10 @@
109109
CONST_NODE_IMAGE_UPGRADE_CHANNEL = "node-image"
110110
CONST_NONE_UPGRADE_CHANNEL = "none"
111111

112+
# consts for upgrade strategy
113+
CONST_UPGRADE_STRATEGY_ROLLING = "Rolling"
114+
CONST_UPGRADE_STRATEGY_BLUE_GREEN = "BlueGreen"
115+
112116
# consts for node os upgrade channel
113117
CONST_NODE_OS_CHANNEL_NODE_IMAGE = "NodeImage"
114118
CONST_NODE_OS_CHANNEL_NONE = "None"

src/aks-preview/azext_aks_preview/_help.py

Lines changed: 58 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1879,7 +1879,6 @@
18791879
short-summary: List node pools in the managed Kubernetes cluster.
18801880
"""
18811881

1882-
# TODO: Add params for blue green upgrade settings
18831882
helps['aks nodepool add'] = """
18841883
type: command
18851884
short-summary: Add a node pool to the managed Kubernetes cluster.
@@ -2074,6 +2073,24 @@
20742073
- name: --localdns-config
20752074
type: string
20762075
short-summary: Set the localDNS Profile for a nodepool with a JSON config file.
2076+
- name: --upgrade-strategy
2077+
type: string
2078+
short-summary: Upgrade strategy for the node pool. Allowed values: "Rolling" or "BlueGreen". Default: "Rolling".
2079+
- name: --drain-batch-size
2080+
type: string
2081+
short-summary: Number or percentage of nodes to drain per batch during blue-green upgrades. Accepts an integer (e.g. '5') or percentage (e.g. '50%'). Default: 10%.
2082+
long-summary: |-
2083+
Specifies how many nodes to drain in each batch during a blue-green upgrade. Must be a non-zero value, either as an integer (e.g. '5') or a percentage (e.g. '50%') of the total blue nodes at the start of the upgrade. Fractional nodes are rounded up. For more details and best practices, see: https://learn.microsoft.com/en-us/azure/aks/upgrade-cluster
2084+
- name: --drain-timeout-bg
2085+
type: int
2086+
short-summary: Timeout (in minutes) to evict pods and gracefully terminate per node during blue-green upgrades. Default: 30 minutes.
2087+
long-summary: Maximum time (in minutes) to wait for pod eviction and graceful termination per node during blue-green upgrades. Honors pod disruption budgets. If exceeded, the upgrade fails. Default: 30 minutes.
2088+
- name: --batch-soak-duration
2089+
type: int
2090+
short-summary: Wait time (in minutes) after draining a batch of nodes before proceeding to the next batch. Default: 15 minutes. Only for blue-green upgrades.
2091+
- name: --final-soak-duration
2092+
type: int
2093+
short-summary: Wait time (in minutes) after all old nodes are drained before removing them. Default: 60 minutes. Only for blue-green upgrades.
20772094
examples:
20782095
- name: Create a nodepool in an existing AKS cluster with ephemeral os enabled.
20792096
text: az aks nodepool add -g MyResourceGroup -n nodepool1 --cluster-name MyManagedCluster --node-osdisk-type Ephemeral --node-osdisk-size 48
@@ -2095,6 +2112,8 @@
20952112
text: az aks nodepool add -g MyResourceGroup -n nodepool1 --cluster-name MyManagedCluster --vm-set-type VirtualMachines --vm-sizes "Standard_D4s_v3,Standard_D8s_v3" --node-count 3
20962113
- name: Create a nodepool with ManagedSystem mode
20972114
text: az aks nodepool add -g MyResourceGroup -n managedsystem1 --cluster-name MyManagedCluster --mode ManagedSystem
2115+
- name: Create a node pool with blue-green upgrade strategy and default parameters
2116+
text: az aks nodepool add -g MyResourceGroup -n nodepool1 --cluster-name MyManagedCluster --upgrade-strategy BlueGreen
20982117
"""
20992118

21002119
helps['aks nodepool scale'] = """
@@ -2149,6 +2168,24 @@
21492168
- name: --undrainable-node-behavior
21502169
type: string
21512170
short-summary: Define the behavior for undrainable nodes during upgrade. The value should be "Cordon" or "Schedule". The default value is "Schedule".
2171+
- name: --upgrade-strategy
2172+
type: string
2173+
short-summary: Upgrade strategy for the node pool. Allowed values: "Rolling" or "BlueGreen". Default: "Rolling".
2174+
- name: --drain-batch-size
2175+
type: string
2176+
short-summary: Number or percentage of nodes to drain per batch during blue-green upgrades. Accepts an integer (e.g. '5') or percentage (e.g. '50%'). Default: 10%.
2177+
long-summary: |-
2178+
Specifies how many nodes to drain in each batch during a blue-green upgrade. Must be a non-zero value, either as an integer (e.g. '5') or a percentage (e.g. '50%') of the total blue nodes at the start of the upgrade. Fractional nodes are rounded up. For more details and best practices, see: https://learn.microsoft.com/en-us/azure/aks/upgrade-cluster
2179+
- name: --drain-timeout-bg
2180+
type: int
2181+
short-summary: Timeout (in minutes) to evict pods and gracefully terminate per node during blue-green upgrades. Default: 30 minutes.
2182+
long-summary: Maximum time (in minutes) to wait for pod eviction and graceful termination per node during blue-green upgrades. Honors pod disruption budgets. If exceeded, the upgrade fails. Default: 30 minutes.
2183+
- name: --batch-soak-duration
2184+
type: int
2185+
short-summary: Wait time (in minutes) after draining a batch of nodes before proceeding to the next batch. Default: 15 minutes. Only for blue-green upgrades.
2186+
- name: --final-soak-duration
2187+
type: int
2188+
short-summary: Wait time (in minutes) after all old nodes are drained before removing them. Default: 60 minutes. Only for blue-green upgrades.
21522189
"""
21532190

21542191
helps['aks nodepool update'] = """
@@ -2252,6 +2289,24 @@
22522289
- name: --localdns-config
22532290
type: string
22542291
short-summary: Set the localDNS Profile for a nodepool with a JSON config file.
2292+
- name: --upgrade-strategy
2293+
type: string
2294+
short-summary: Upgrade strategy for the node pool. Allowed values: "Rolling" or "BlueGreen". Default: "Rolling".
2295+
- name: --drain-batch-size
2296+
type: string
2297+
short-summary: Number or percentage of nodes to drain per batch during blue-green upgrades. Accepts an integer (e.g. '5') or percentage (e.g. '50%'). Default: 10%.
2298+
long-summary: |-
2299+
Specifies how many nodes to drain in each batch during a blue-green upgrade. Must be a non-zero value, either as an integer (e.g. '5') or a percentage (e.g. '50%') of the total blue nodes at the start of the upgrade. Fractional nodes are rounded up. For more details and best practices, see: https://learn.microsoft.com/en-us/azure/aks/upgrade-cluster
2300+
- name: --drain-timeout-bg
2301+
type: int
2302+
short-summary: Timeout (in minutes) to evict pods and gracefully terminate per node during blue-green upgrades. Default: 30 minutes.
2303+
long-summary: Maximum time (in minutes) to wait for pod eviction and graceful termination per node during blue-green upgrades. Honors pod disruption budgets. If exceeded, the upgrade fails. Default: 30 minutes.
2304+
- name: --batch-soak-duration
2305+
type: int
2306+
short-summary: Wait time (in minutes) after draining a batch of nodes before proceeding to the next batch. Default: 15 minutes. Only for blue-green upgrades.
2307+
- name: --final-soak-duration
2308+
type: int
2309+
short-summary: Wait time (in minutes) after all old nodes are drained before removing them. Default: 60 minutes. Only for blue-green upgrades.
22552310
examples:
22562311
- name: Reconcile the nodepool back to its current state.
22572312
text: az aks nodepool update -g MyResourceGroup -n nodepool1 --cluster-name MyManagedCluster
@@ -2263,6 +2318,8 @@
22632318
text: az aks nodepool update --update-cluster-autoscaler --min-count 1 --max-count 10 -g MyResourceGroup -n nodepool1 --cluster-name MyManagedCluster
22642319
- name: Change a node pool to system mode
22652320
text: az aks nodepool update --mode System -g MyResourceGroup -n nodepool1 --cluster-name MyManagedCluster
2321+
- name: Update a node pool with blue-green upgrade settings
2322+
text: az aks nodepool update -g MyResourceGroup -n nodepool1 --cluster-name MyManagedCluster --drain-batch-size 50% --drain-timeout-bg 5 --batch-soak-duration 10 --final-soak-duration 10
22662323
"""
22672324

22682325
helps['aks nodepool get-upgrades'] = """

src/aks-preview/azext_aks_preview/_params.py

Lines changed: 26 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -152,6 +152,8 @@
152152
CONST_ADVANCED_NETWORKPOLICIES_L7,
153153
CONST_TRANSIT_ENCRYPTION_TYPE_NONE,
154154
CONST_TRANSIT_ENCRYPTION_TYPE_WIREGUARD,
155+
CONST_UPGRADE_STRATEGY_ROLLING,
156+
CONST_UPGRADE_STRATEGY_BLUE_GREEN,
155157
)
156158

157159
from azext_aks_preview._validators import (
@@ -223,6 +225,7 @@
223225
validate_gateway_prefix_size,
224226
validate_max_unavailable,
225227
validate_max_blocked_nodes,
228+
validate_drain_batch_size,
226229
validate_resource_group_parameter,
227230
validate_location_resource_group_cluster_parameters,
228231
)
@@ -507,6 +510,11 @@
507510
CONST_GPU_DRIVER_TYPE_GRID,
508511
]
509512

513+
upgrade_strategies = [
514+
CONST_UPGRADE_STRATEGY_ROLLING,
515+
CONST_UPGRADE_STRATEGY_BLUE_GREEN,
516+
]
517+
510518

511519
def load_arguments(self, _):
512520
acr_arg_type = CLIArgumentType(metavar="ACR_NAME_OR_RESOURCE_ID")
@@ -1681,7 +1689,12 @@ def load_arguments(self, _):
16811689
c.argument("max_surge", validator=validate_max_surge)
16821690
c.argument("drain_timeout", type=int)
16831691
c.argument("node_soak_duration", type=int)
1684-
# TODO: Add args for blue green upgrades (and validators if needed)
1692+
# blue-green upgrade parameters
1693+
c.argument("upgrade_strategy", arg_type=get_enum_type(upgrade_strategies))
1694+
c.argument("drain_batch_size", validator=validate_drain_batch_size)
1695+
c.argument("drain_timeout_bg", type=int)
1696+
c.argument("batch_soak_duration", type=int)
1697+
c.argument("final_soak_duration", type=int)
16851698
c.argument("undrainable_node_behavior")
16861699
c.argument("max_unavailable", validator=validate_max_unavailable)
16871700
c.argument("max_blocked_nodes", validator=validate_max_blocked_nodes)
@@ -1821,6 +1834,12 @@ def load_arguments(self, _):
18211834
c.argument("drain_timeout", type=int)
18221835
c.argument("node_soak_duration", type=int)
18231836
c.argument("undrainable_node_behavior")
1837+
# blue-green upgrade parameters
1838+
c.argument("upgrade_strategy", arg_type=get_enum_type(upgrade_strategies))
1839+
c.argument("drain_batch_size", validator=validate_drain_batch_size)
1840+
c.argument("drain_timeout_bg", type=int)
1841+
c.argument("batch_soak_duration", type=int)
1842+
c.argument("final_soak_duration", type=int)
18241843
c.argument("max_unavailable", validator=validate_max_unavailable)
18251844
c.argument("max_blocked_nodes", validator=validate_max_blocked_nodes)
18261845
c.argument("mode", arg_type=get_enum_type(node_mode_types))
@@ -1898,6 +1917,12 @@ def load_arguments(self, _):
18981917
c.argument("drain_timeout", type=int)
18991918
c.argument("node_soak_duration", type=int)
19001919
c.argument("undrainable_node_behavior")
1920+
# blue-green upgrade parameters
1921+
c.argument("upgrade_strategy", arg_type=get_enum_type(upgrade_strategies))
1922+
c.argument("drain_batch_size", validator=validate_drain_batch_size)
1923+
c.argument("drain_timeout_bg", type=int)
1924+
c.argument("batch_soak_duration", type=int)
1925+
c.argument("final_soak_duration", type=int)
19011926
c.argument("max_unavailable", validator=validate_max_unavailable)
19021927
c.argument("max_blocked_nodes", validator=validate_max_blocked_nodes)
19031928
c.argument("snapshot_id", validator=validate_snapshot_id)

src/aks-preview/azext_aks_preview/_validators.py

Lines changed: 17 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -530,6 +530,23 @@ def validate_max_blocked_nodes(namespace):
530530
raise InvalidArgumentValueError('--max-blocked-nodes should be an int or percentage')
531531

532532

533+
def validate_drain_batch_size(namespace):
534+
"""validates drain batch size parameter as non-zero integers or percentages."""
535+
if namespace.drain_batch_size is None:
536+
return
537+
int_or_percent = namespace.drain_batch_size
538+
if int_or_percent.endswith('%'):
539+
int_or_percent = int_or_percent.rstrip('%')
540+
541+
try:
542+
value = int(int_or_percent)
543+
if value <= 0:
544+
raise InvalidArgumentValueError('--drain-batch-size must be a non-zero value')
545+
except ValueError:
546+
# pylint: disable=raise-missing-from
547+
raise InvalidArgumentValueError('--drain-batch-size should be an integer or percentage (e.g., "5" or "50%")')
548+
549+
533550
def validate_assign_identity(namespace):
534551
if namespace.assign_identity is not None:
535552
if namespace.assign_identity == '':

0 commit comments

Comments
 (0)