Skip to content
1 change: 1 addition & 0 deletions src/azure-cli/HISTORY.rst
Original file line number Diff line number Diff line change
Expand Up @@ -268,6 +268,7 @@ Release History
* `az cognitiveservices agent logs show`: Add console log streaming for hosted agents (#32701)
* `az cognitiveservices agent create`: Add `--show-logs` flag for deployment troubleshooting (#32701)
* `az cognitiveservices agent start`: Add `--show-logs` and `--timeout` flags (#32701)
* [PREVIEW] `az cognitiveservices account managed-compute-deployment`: Add new command group for managing GPU-backed managed compute deployments with create, show, list, update, and delete operations
Comment thread
achauhan-scc marked this conversation as resolved.

**Container app**

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -113,5 +113,9 @@ def cf_project_capability_hosts(cli_ctx, *_):
return get_cognitiveservices_management_client(cli_ctx).project_capability_hosts


def cf_managed_compute_deployments(cli_ctx, *_):
return get_cognitiveservices_management_client(cli_ctx).managed_compute_deployments


def cf_project_connections(cli_ctx, *_):
return get_cognitiveservices_management_client(cli_ctx).project_connections
Original file line number Diff line number Diff line change
Expand Up @@ -371,6 +371,86 @@
text: az cognitiveservices account deployment list -g yuanyang-test-sdk -n yytest-oai
"""

helps[
"cognitiveservices account managed-compute-deployment"
] = """
type: group
short-summary: Manage managed compute deployments for Azure Cognitive Services accounts.
"""

helps[
"cognitiveservices account managed-compute-deployment create"
] = """
type: command
short-summary: Create a managed compute deployment for Azure Cognitive Services account.
long-summary: Create a GPU-backed managed compute deployment associated with a Cognitive Services account.
examples:
- name: Create a managed compute deployment.
text: >
az cognitiveservices account managed-compute-deployment create
-g myResourceGroup -n myAccount
--deployment-name gpt-oss-120b-gpu
--model "azureml://registries/azureml-openai-oss/models/gpt-oss-120b/versions/4"
--deployment-template "azureml://registries/azureml-openai-oss/deploymenttemplates/gpt-oss-120b-short-context/versions/1"
--accelerator-type H100_80GB
--sku-name GlobalManagedCompute
--sku-capacity 1
--tags environment=production team=nlp
"""

helps[
"cognitiveservices account managed-compute-deployment show"
] = """
type: command
short-summary: Show a managed compute deployment for Azure Cognitive Services account.
examples:
- name: Show a managed compute deployment.
text: >
az cognitiveservices account managed-compute-deployment show
-g myResourceGroup -n myAccount
--deployment-name gpt-oss-120b-gpu
"""

helps[
"cognitiveservices account managed-compute-deployment list"
] = """
type: command
short-summary: List all managed compute deployments for Azure Cognitive Services account.
examples:
- name: List all managed compute deployments.
text: >
az cognitiveservices account managed-compute-deployment list
-g myResourceGroup -n myAccount
"""

helps[
"cognitiveservices account managed-compute-deployment update"
] = """
type: command
short-summary: Update a managed compute deployment for Azure Cognitive Services account.
long-summary: Only SKU (name/capacity) and tags can be updated. Model and accelerator type are immutable after creation.
examples:
- name: Update SKU capacity of a managed compute deployment.
text: >
az cognitiveservices account managed-compute-deployment update
-g myResourceGroup -n myAccount
--deployment-name gpt-oss-120b-gpu
--sku-capacity 2
"""

helps[
"cognitiveservices account managed-compute-deployment delete"
] = """
type: command
short-summary: Delete a managed compute deployment from Azure Cognitive Services account.
examples:
- name: Delete a managed compute deployment.
text: >
az cognitiveservices account managed-compute-deployment delete
-g myResourceGroup -n myAccount
--deployment-name gpt-oss-120b-gpu
"""

helps[
"cognitiveservices commitment-tier"
] = """
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -672,6 +672,24 @@ def load_arguments(self, _):
'scale_settings_capacity', options_list=['--scale-capacity', '--scale-settings-capacity'],
help='Cognitive Services account deployment scale settings capacity.')

with self.argument_context('cognitiveservices account managed-compute-deployment') as c:
c.argument('deployment_name', help='Managed compute deployment name.')

with self.argument_context('cognitiveservices account managed-compute-deployment create') as c:
c.argument('model', help='AzureML registry model URI '
'(e.g., azureml://registries/{registry}/models/{model}/versions/{version}).')
c.argument('deployment_template', options_list=['--deployment-template'],
help='AzureML registry deployment template URI '
'(e.g., azureml://registries/{registry}/deploymenttemplates/{template}/versions/{version}).')
c.argument('accelerator_type', options_list=['--accelerator-type'],
help='GPU accelerator type (e.g., H100_80GB).')
c.argument('version_upgrade_option', options_list=['--version-upgrade-option'],
help='Version upgrade policy. Allowed values: OnceNewDefaultVersionAvailable, '
'OnceCurrentVersionExpired, NoAutoUpgrade.')
Comment thread
achauhan-scc marked this conversation as resolved.

with self.argument_context('cognitiveservices account managed-compute-deployment update') as c:
c.argument('tags', tags_type)

with self.argument_context('cognitiveservices account commitment-plan') as c:
c.argument('commitment_plan_name', help='Cognitive Services account commitment plan name')
c.argument('plan_type', help='Cognitive Services account commitment plan type')
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -7,7 +7,8 @@
from azure.cli.command_modules.cognitiveservices._client_factory import cf_accounts, cf_resource_skus, \
cf_deleted_accounts, cf_deployments, cf_commitment_plans, cf_commitment_tiers, cf_models, cf_usages, \
cf_ai_projects, cf_account_connections, cf_projects, cf_project_connections, \
cf_managed_network_settings, cf_managed_network_provisions, cf_outbound_rule
cf_managed_network_settings, cf_managed_network_provisions, cf_outbound_rule, \
cf_managed_compute_deployments


def load_command_table(self, _):
Expand Down Expand Up @@ -197,3 +198,17 @@ def load_command_table(self, _):
setter_name='update',
setter_arg_name='connection',
custom_func_name='account_connection_update')

managed_compute_deployments_type = CliCommandType(
operations_tmpl='azure.mgmt.cognitiveservices.operations#ManagedComputeDeploymentsOperations.{}',
client_factory=cf_managed_compute_deployments
)

with self.command_group(
'cognitiveservices account managed-compute-deployment', managed_compute_deployments_type,
client_factory=cf_managed_compute_deployments, is_preview=True) as g:
g.custom_command('create', 'managed_compute_deployment_create')
g.custom_command('show', 'managed_compute_deployment_show')
g.custom_command('list', 'managed_compute_deployment_list')
g.custom_command('update', 'managed_compute_deployment_update')
g.custom_command('delete', 'managed_compute_deployment_delete')
Original file line number Diff line number Diff line change
Expand Up @@ -394,6 +394,68 @@ def deployment_begin_create_or_update(
return client.begin_create_or_update(resource_group_name, account_name, deployment_name, dpy, polling=False)


def managed_compute_deployment_create(
client, resource_group_name, account_name, deployment_name,
model, deployment_template=None, accelerator_type=None,
version_upgrade_option=None,
sku_name=None, sku_capacity=None, tags=None):
"""
Create a managed compute deployment for Azure Cognitive Services account.
"""
from azure.mgmt.cognitiveservices.models import ManagedComputeDeployment, ManagedComputeDeploymentProperties
properties = ManagedComputeDeploymentProperties(
model=model,
deployment_template=deployment_template,
accelerator_type=accelerator_type,
version_upgrade_option=version_upgrade_option,
)
deployment = ManagedComputeDeployment(properties=properties)
if sku_name is not None:
deployment.sku = Sku(name=sku_name, capacity=sku_capacity)
if tags is not None:
deployment.tags = tags
return client.begin_create_or_update(
resource_group_name, account_name, deployment_name, deployment)


def managed_compute_deployment_update(
client, resource_group_name, account_name, deployment_name,
sku_name=None, sku_capacity=None, tags=None):
"""
Update a managed compute deployment for Azure Cognitive Services account.
Only SKU (name/capacity) and tags can be updated.
"""
from azure.mgmt.cognitiveservices.models import PatchResourceSku
patch = PatchResourceSku()
if sku_name is not None or sku_capacity is not None:
patch.sku = Sku(name=sku_name, capacity=sku_capacity)
if tags is not None:
patch.tags = tags
return client.begin_update(
resource_group_name, account_name, deployment_name, patch)


def managed_compute_deployment_show(client, resource_group_name, account_name, deployment_name):
"""
Show a managed compute deployment for Azure Cognitive Services account.
"""
return client.get(resource_group_name, account_name, deployment_name)


def managed_compute_deployment_list(client, resource_group_name, account_name):
"""
List managed compute deployments for Azure Cognitive Services account.
"""
return client.list(resource_group_name, account_name)


def managed_compute_deployment_delete(client, resource_group_name, account_name, deployment_name):
"""
Delete a managed compute deployment from Azure Cognitive Services account.
"""
return client.begin_delete(resource_group_name, account_name, deployment_name)


def commitment_plan_create_or_update(
client,
resource_group_name,
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -88,5 +88,23 @@ cognitiveservices agent logs bulk-set:
rule_exclusions:
- missing_command_test_coverage
cognitiveservices agent logs remove:
rule_exclusions:
- missing_command_test_coverage
cognitiveservices account managed-compute-deployment:
rule_exclusions:
- missing_command_test_coverage
cognitiveservices account managed-compute-deployment create:
rule_exclusions:
- missing_command_test_coverage
cognitiveservices account managed-compute-deployment show:
rule_exclusions:
- missing_command_test_coverage
cognitiveservices account managed-compute-deployment list:
rule_exclusions:
- missing_command_test_coverage
cognitiveservices account managed-compute-deployment update:
rule_exclusions:
- missing_command_test_coverage
cognitiveservices account managed-compute-deployment delete:
rule_exclusions:
- missing_command_test_coverage
Original file line number Diff line number Diff line change
@@ -0,0 +1,100 @@
# --------------------------------------------------------------------------------------------
# Copyright (c) Microsoft Corporation. All rights reserved.
# Licensed under the MIT License. See License.txt in the project root for license information.
# --------------------------------------------------------------------------------------------

import unittest

from azure.cli.testsdk import ScenarioTest, ResourceGroupPreparer
from azure.cli.testsdk.decorators import serial_test


class CognitiveServicesManagedComputeDeploymentTests(ScenarioTest):
@serial_test()
@ResourceGroupPreparer()
def test_cognitiveservices_managed_compute_deployment(self, resource_group):
sname = self.create_random_name(prefix='cs_cli_test_', length=16)
Comment thread
achauhan-scc marked this conversation as resolved.
Outdated

self.kwargs.update({
'sname': sname,
'kind': 'AIServices',
'sku': 'S0',
'location': 'eastus',
'deployment_name': 'test-mcd',
'model': 'azureml://registries/azureml-openai-oss/models/gpt-oss-120b/versions/4',
'deployment_template': 'azureml://registries/azureml-openai-oss/deploymenttemplates/'
'gpt-oss-120b-short-context/versions/1',
'accelerator_type': 'H100_80GB',
'sku_name': 'GlobalManagedCompute',
'sku_capacity': '1',
})

# create cognitive services account
self.cmd(
'az cognitiveservices account create -n {sname} -g {rg} '
'--kind {kind} --sku {sku} -l {location} --yes',
checks=[
self.check('name', '{sname}'),
self.check('properties.provisioningState', 'Succeeded'),
])

# list should be empty initially
self.cmd(
'az cognitiveservices account managed-compute-deployment list '
'-n {sname} -g {rg}',
checks=[self.check('length(@)', 0)])

# create managed compute deployment
self.cmd(
'az cognitiveservices account managed-compute-deployment create '
'-n {sname} -g {rg} '
'--deployment-name {deployment_name} '
'--model "{model}" '
'--deployment-template "{deployment_template}" '
'--accelerator-type {accelerator_type} '
'--sku-name {sku_name} '
'--sku-capacity {sku_capacity} '
'--tags environment=test')

# show the deployment
self.cmd(
'az cognitiveservices account managed-compute-deployment show '
'-n {sname} -g {rg} '
'--deployment-name {deployment_name}',
checks=[
self.check('name', '{deployment_name}'),
self.check('properties.model', '{model}'),
self.check('sku.name', '{sku_name}'),
])

# list should contain the deployment
self.cmd(
'az cognitiveservices account managed-compute-deployment list '
'-n {sname} -g {rg}',
checks=[self.check('length(@)', 1)])

# update sku capacity
self.cmd(
'az cognitiveservices account managed-compute-deployment update '
'-n {sname} -g {rg} '
'--deployment-name {deployment_name} '
'--sku-capacity 2')

# delete the deployment
self.cmd(
'az cognitiveservices account managed-compute-deployment delete '
'-n {sname} -g {rg} '
'--deployment-name {deployment_name}')

# verify deletion
self.cmd(
'az cognitiveservices account managed-compute-deployment list '
'-n {sname} -g {rg}',
checks=[self.check('length(@)', 0)])

# cleanup
self.cmd('az cognitiveservices account delete -n {sname} -g {rg}')


if __name__ == '__main__':
unittest.main()
Loading