Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
4 changes: 4 additions & 0 deletions src/aks-preview/HISTORY.rst
Original file line number Diff line number Diff line change
Expand Up @@ -9,6 +9,10 @@ If there is no rush to release a new version, please just add a description of t

To release a new version, please select a new version number (usually plus 1 to last patch version, X.Y.Z -> Major.Minor.Patch, more details in `\doc <https://semver.org/>`_), and then add a new section named as the new version number in this file, the content should include the new modifications and everything from the *Pending* section. Finally, update the `VERSION` variable in `setup.py` with this new version number.

9.0.0b4
+++++++
* Add `--driver-type` to the `az aks nodepool add` command.

Copy link
Copy Markdown

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

🔎
Please ensure to format the placeholder and URLs properly as per the guidelines:

+To release a new version, please select a new version number (usually plus 1 to last patch version, X.Y.Z -> Major.Minor.Patch, more details in `\doc <https://semver.org/>`_), and then add a new section named as the new version number in this file, the content should include the new modifications and everything from the *Pending* section. Finally, update the `VERSION` variable in `setup.py` with this new version number.
  • Replace \doc <https://semver.org/>_ with appropriate surrounding HTML tags or use backticks for placeholders and ensure the URL link is marked as an example if not accessible.

9.0.0b3
+++++++
* Add `--undrainable-node-behavior` to the `az aks nodepool add/update/upgrade` commands.
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -21,7 +21,8 @@
"test_aks_disable_addon_gitops"
],
"gpu, no quota": [
"test_aks_nodepool_add_with_gpu_instance_profile"
"test_aks_nodepool_add_with_gpu_instance_profile",
"test_aks_gpu_driver_type"
],
"overlay migration, missing toggle": [
"test_aks_azure_cni_overlay_migration"
Expand Down
4 changes: 4 additions & 0 deletions src/aks-preview/azext_aks_preview/_consts.py
Original file line number Diff line number Diff line change
Expand Up @@ -331,3 +331,7 @@
# TLS Management Consts
CONST_TLS_MANAGEMENT_MANAGED = "Managed"
CONST_TLS_MANAGEMENT_NONE = "None"

# GPU Driver Type Consts
CONST_GPU_DRIVER_TYPE_CUDA = "CUDA"
CONST_GPU_DRIVER_TYPE_GRID = "GRID"
Copy link
Copy Markdown

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

🔎
Consider using an enumeration to define the constants for better code organization and type safety.

from enum import Enum

class TlsManagement(Enum):
    MANAGED = "Managed"
    NONE = "None"

class GpuDriverType(Enum):
    CUDA = "CUDA"
    GRID = "GRID"

3 changes: 3 additions & 0 deletions src/aks-preview/azext_aks_preview/_help.py
Original file line number Diff line number Diff line change
Expand Up @@ -1861,6 +1861,9 @@
- name: --skip-gpu-driver-install
type: bool
short-summary: To skip GPU driver auto installation by AKS on a nodepool using GPU vm size if customers want to manage GPU driver installation by their own. If not specified, the default is false.
- name: --driver-type
type: string
short-summary: Specify the type of GPU driver to install when creating Windows agent pools. Valid values are "GRID" and "CUDA". If not provided, AKS selects the driver based on system compatibility. This option cannot be changed once the AgentPool has been created. The default is system selected.
Copy link
Copy Markdown

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

🔎
--driver-type should use backticks for the placeholders "GRID" and "CUDA" to clearly denote them as specific values:

- short-summary: Specify the type of GPU driver to install when creating Windows agent pools. Valid values are "GRID" and "CUDA". 
+ short-summary: Specify the type of GPU driver to install when creating Windows agent pools. Valid values are `GRID` and `CUDA`. 

- name: --ssh-access
type: string
short-summary: Configure SSH setting for the node pool. Use "disabled" to disable SSH access, "localuser" to enable SSH access using private key.
Expand Down
12 changes: 12 additions & 0 deletions src/aks-preview/azext_aks_preview/_params.py
Original file line number Diff line number Diff line change
Expand Up @@ -129,6 +129,8 @@
CONST_APP_ROUTING_NONE_NGINX,
CONST_TLS_MANAGEMENT_MANAGED,
CONST_TLS_MANAGEMENT_NONE,
CONST_GPU_DRIVER_TYPE_CUDA,
CONST_GPU_DRIVER_TYPE_GRID,
)
from azext_aks_preview._validators import (
validate_acr,
Expand Down Expand Up @@ -420,6 +422,11 @@
CONST_TLS_MANAGEMENT_NONE,
]

gpu_driver_types = [
CONST_GPU_DRIVER_TYPE_CUDA,
CONST_GPU_DRIVER_TYPE_GRID,
]


def load_arguments(self, _):
acr_arg_type = CLIArgumentType(metavar="ACR_NAME_OR_RESOURCE_ID")
Expand Down Expand Up @@ -1589,6 +1596,11 @@ def load_arguments(self, _):
help="space-separated tags: key[=value] [key[=value] ...].",
)
c.argument('skip_gpu_driver_install', action='store_true', is_preview=True)
c.argument(
"driver_type",
arg_type=get_enum_type(gpu_driver_types),
is_preview=True,
)
# in creation scenario, use "localuser" as default
c.argument(
'ssh_access',
Expand Down
31 changes: 31 additions & 0 deletions src/aks-preview/azext_aks_preview/agentpool_decorator.py
Original file line number Diff line number Diff line change
Expand Up @@ -584,6 +584,24 @@ def get_skip_gpu_driver_install(self) -> bool:

return skip_gpu_driver_install

def get_driver_type(self) -> Union[str, None]:
"""Obtain the value of driver_type.
:return: str or None
"""
# read the original value passed by the command
driver_type = self.raw_param.get("driver_type")

# In create mode, try to read the property value corresponding to the parameter from the `agentpool` object
if self.decorator_mode == DecoratorMode.CREATE:
if (
self.agentpool and
self.agentpool.gpu_profile is not None and
self.agentpool.gpu_profile.driver_type is not None
):
driver_type = self.agentpool.gpu_profile.driver_type

return driver_type

def get_enable_secure_boot(self) -> bool:
"""Obtain the value of enable_secure_boot.
:return: bool
Expand Down Expand Up @@ -867,6 +885,17 @@ def set_up_skip_gpu_driver_install(self, agentpool: AgentPool) -> AgentPool:
agentpool.gpu_profile.install_gpu_driver = False
return agentpool

def set_up_driver_type(self, agentpool: AgentPool) -> AgentPool:
"""Set up driver type property for the AgentPool object."""
self._ensure_agentpool(agentpool)

driver_type = self.context.get_driver_type()
if driver_type is not None:
if agentpool.gpu_profile is None:
agentpool.gpu_profile = self.models.AgentPoolGPUProfile() # pylint: disable=no-member
agentpool.gpu_profile.driver_type = driver_type
return agentpool

def set_up_pod_ip_allocation_mode(self, agentpool: AgentPool) -> AgentPool:
"""Set up pod ip allocation mode for the AgentPool object."""
self._ensure_agentpool(agentpool)
Expand Down Expand Up @@ -968,6 +997,8 @@ def construct_agentpool_profile_preview(self) -> AgentPool:
agentpool = self.set_up_artifact_streaming(agentpool)
# set up skip_gpu_driver_install
agentpool = self.set_up_skip_gpu_driver_install(agentpool)
# set up driver_type
agentpool = self.set_up_driver_type(agentpool)
# set up agentpool ssh access
agentpool = self.set_up_ssh_access(agentpool)
# set up agentpool pod ip allocation mode
Expand Down
1 change: 1 addition & 0 deletions src/aks-preview/azext_aks_preview/custom.py
Original file line number Diff line number Diff line change
Expand Up @@ -1215,6 +1215,7 @@ def aks_agentpool_add(
node_public_ip_tags=None,
enable_artifact_streaming=False,
skip_gpu_driver_install=False,
driver_type=None,
Copy link
Copy Markdown

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

🔎
Ensure the new parameter driver_type is documented and handled appropriately within the function. If driver_type needs validation or default values, add those. For example:

if driver_type is not None:
    # Validate driver_type options or set default if needed
    valid_driver_types = ['type1', 'type2']
    if driver_type not in valid_driver_types:
        raise ValueError(f"Invalid driver_type: {driver_type}. Must be one of {valid_driver_types}")

ssh_access=CONST_SSH_ACCESS_LOCALUSER,
# trusted launch
enable_secure_boot=False,
Expand Down
Loading