Skip to content

Commit 4b4f6d0

Browse files
committed
mig
1 parent 7be5e1b commit 4b4f6d0

3 files changed

Lines changed: 64 additions & 3 deletions

File tree

src/aks-preview/azcli_aks_live_test/configs/ext_matrix_default.json

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -24,7 +24,8 @@
2424
"test_aks_nodepool_add_with_gpu_instance_profile",
2525
"test_aks_gpu_driver_type",
2626
"test_aks_nodepool_add_with_enable_managed_gpu",
27-
"test_aks_nodepool_update_with_enable_managed_gpu"
27+
"test_aks_nodepool_update_with_enable_managed_gpu",
28+
"test_aks_nodepool_add_with_gpu_mig_strategy"
2829
],
2930
"pod ip allocation mode static block, missing feature registration": [
3031
"test_aks_create_with_pod_ip_allocation_mode_static_block"

src/aks-preview/azext_aks_preview/agentpool_decorator.py

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1395,6 +1395,7 @@ def set_up_gpu_mig_strategy(self, agentpool: AgentPool) -> AgentPool:
13951395
if agentpool.gpu_profile.nvidia is None:
13961396
agentpool.gpu_profile.nvidia = self.models.NvidiaGPUProfile() # pylint: disable=no-member
13971397
agentpool.gpu_profile.nvidia.mig_strategy = gpu_mig_strategy
1398+
agentpool.gpu_profile.driver = CONST_GPU_DRIVER_INSTALL
13981399
return agentpool
13991400

14001401
def set_up_pod_ip_allocation_mode(self, agentpool: AgentPool) -> AgentPool:
@@ -1778,6 +1779,7 @@ def update_gpu_mig_strategy(self, agentpool: AgentPool) -> AgentPool:
17781779
if agentpool.gpu_profile.nvidia is None:
17791780
agentpool.gpu_profile.nvidia = self.models.NvidiaGPUProfile() # pylint: disable=no-member
17801781
agentpool.gpu_profile.nvidia.mig_strategy = gpu_mig_strategy
1782+
agentpool.gpu_profile.driver = CONST_GPU_DRIVER_INSTALL
17811783
return agentpool
17821784

17831785
def update_artifact_streaming(self, agentpool: AgentPool) -> AgentPool:

src/aks-preview/azext_aks_preview/tests/latest/test_aks_commands.py

Lines changed: 60 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -5515,6 +5515,64 @@ def test_aks_nodepool_add_with_gpu_instance_profile(
55155515
checks=[self.is_empty()],
55165516
)
55175517

5518+
@live_only()
5519+
@AllowLargeResponse()
5520+
@AKSCustomResourceGroupPreparer(
5521+
random_name_length=17, name_prefix="clitest", location="westus3"
5522+
)
5523+
def test_aks_nodepool_add_with_gpu_mig_strategy(
5524+
self, resource_group, resource_group_location
5525+
):
5526+
aks_name = self.create_random_name("cliakstest", 16)
5527+
node_pool_name = self.create_random_name("c", 6)
5528+
node_pool_name_second = self.create_random_name("c", 6)
5529+
self.kwargs.update(
5530+
{
5531+
"resource_group": resource_group,
5532+
"name": aks_name,
5533+
"node_pool_name": node_pool_name,
5534+
"node_pool_name_second": node_pool_name_second,
5535+
"ssh_key_value": self.generate_ssh_keys(),
5536+
}
5537+
)
5538+
5539+
create_cmd = (
5540+
"aks create --resource-group={resource_group} --name={name} "
5541+
"--nodepool-name {node_pool_name} -c 1 "
5542+
"--ssh-key-value={ssh_key_value}"
5543+
)
5544+
self.cmd(
5545+
create_cmd,
5546+
checks=[
5547+
self.check("provisioningState", "Succeeded"),
5548+
],
5549+
)
5550+
5551+
# nodepool add with gpu-mig-strategy
5552+
self.cmd(
5553+
"aks nodepool add "
5554+
"--resource-group={resource_group} "
5555+
"--cluster-name={name} "
5556+
"--name={node_pool_name_second} "
5557+
"--enable-managed-gpu=true "
5558+
"--gpu-instance-profile=MIG3g "
5559+
"--gpu-mig-strategy=Single "
5560+
"-c 1 "
5561+
"--aks-custom-headers UseGPUDedicatedVHD=true "
5562+
"--node-vm-size=Standard_NC24ads_A100_v4",
5563+
checks=[
5564+
self.check("provisioningState", "Succeeded"),
5565+
self.check("gpuInstanceProfile", "MIG3g"),
5566+
self.check("gpuProfile.nvidia.migStrategy", "Single"),
5567+
],
5568+
)
5569+
5570+
# delete
5571+
self.cmd(
5572+
"aks delete -g {resource_group} -n {name} --yes --no-wait",
5573+
checks=[self.is_empty()],
5574+
)
5575+
55185576
@live_only() # live only due to workspace is not mocked correctly and role assignment is not mocked
55195577
@AllowLargeResponse()
55205578
@AKSCustomResourceGroupPreparer(
@@ -6907,7 +6965,7 @@ def test_aks_nodepool_add_with_enable_managed_gpu(
69076965
self.cmd(
69086966
"aks nodepool add --resource-group={resource_group} --cluster-name={name} --name={node_pool_name} "
69096967
"--node-vm-size={node_vm_size} --node-count 1 "
6910-
" --enable-managed-gpu",
6968+
"--enable-managed-gpu=true ",
69116969
checks=[
69126970
self.check("provisioningState", "Succeeded"),
69136971
self.check("gpuProfile.driver", "Install"),
@@ -16644,7 +16702,7 @@ def test_aks_nodepool_update_with_enable_managed_gpu(
1664416702
"--resource-group={resource_group} "
1664516703
"--cluster-name={name} "
1664616704
"--name={node_pool_name} "
16647-
"--enable-managed-gpu",
16705+
"--enable-managed-gpu=true ",
1664816706
checks=[
1664916707
self.check("provisioningState", "Succeeded"),
1665016708
self.check("gpuProfile.driver", "Install"),

0 commit comments

Comments
 (0)