Skip to content

Commit 81a15e1

Browse files
authored
{Cognitive Services} az cognitiveservices agent: ABAC-enabled ACR registry support (#32863)
1 parent ec64eb2 commit 81a15e1

1 file changed

Lines changed: 168 additions & 57 deletions

File tree

  • src/azure-cli/azure/cli/command_modules/cognitiveservices

src/azure-cli/azure/cli/command_modules/cognitiveservices/custom.py

Lines changed: 168 additions & 57 deletions
Original file line numberDiff line numberDiff line change
@@ -415,6 +415,18 @@ def commitment_plan_create_or_update(
415415

416416
AGENT_API_VERSION_PARAMS = {"api-version": "2025-11-15-preview"}
417417

418+
# Roles that grant pull access to ACR. Used by _check_project_acr_access.
419+
_ACR_PULL_ROLES = {
420+
'AcrPull',
421+
'AcrPush',
422+
'Container Registry Repository Reader',
423+
'Container Registry Repository Writer',
424+
'Container Registry Repository Contributor',
425+
'Reader',
426+
'Contributor',
427+
'Owner',
428+
}
429+
418430

419431
def _validate_image_tag(image_uri):
420432
"""
@@ -772,11 +784,22 @@ def _build_image_remotely(cmd, source_dir, image_name, # pylint: disable=too-ma
772784
# Use ACR module client factories and utility functions for build operations.
773785
# These private APIs are pinned to specific preview API versions and handle complex
774786
# operations like source upload, task scheduling, and log streaming.
775-
from azure.cli.command_modules.acr._client_factory import cf_acr_registries_tasks, cf_acr_runs
776-
from azure.cli.command_modules.acr._stream_utils import stream_logs
777-
from azure.cli.command_modules.acr._utils import prepare_source_location, get_resource_group_name_by_registry_name
778787
import base64
779788

789+
from azure.cli.command_modules.acr._client_factory import (
790+
cf_acr_registries, cf_acr_registries_tasks, cf_acr_runs,
791+
)
792+
from azure.cli.command_modules.acr._stream_utils import stream_logs
793+
from azure.cli.command_modules.acr._utils import (
794+
prepare_source_location,
795+
get_resource_group_name_by_registry_name,
796+
)
797+
from azure.mgmt.containerregistry.models import (
798+
Credentials as AcrCredentials,
799+
RoleAssignmentMode,
800+
SourceRegistryCredentials,
801+
)
802+
780803
logger.warning("Building image remotely using ACR Task: %s", image_name)
781804

782805
# Get ACR clients - these use preview API versions with build task support
@@ -787,7 +810,34 @@ def _build_image_remotely(cmd, source_dir, image_name, # pylint: disable=too-ma
787810
resource_group_name = get_resource_group_name_by_registry_name(
788811
cmd.cli_ctx, registry_name)
789812

813+
# For ABAC-enabled registries, one-off build runs (schedule_run) require
814+
# SourceRegistryCredentials with identity='[caller]' so the ACR task
815+
# authenticates as the signed-in CLI user for push. This matches
816+
# `az acr build --source-acr-auth-id [caller]` behavior.
817+
# For non-ABAC registries (or if ABAC mode cannot be determined), no
818+
# explicit credentials are needed.
819+
build_credentials = None
820+
790821
try:
822+
try:
823+
registry = cf_acr_registries(cmd.cli_ctx).get(
824+
resource_group_name, registry_name)
825+
registry_abac_enabled = (
826+
getattr(registry, 'role_assignment_mode', None) ==
827+
RoleAssignmentMode.ABAC_REPOSITORY_PERMISSIONS
828+
)
829+
if registry_abac_enabled:
830+
build_credentials = AcrCredentials(
831+
source_registry=SourceRegistryCredentials(identity='[caller]')
832+
)
833+
except Exception as registry_lookup_error: # pylint: disable=broad-except
834+
logger.debug(
835+
"Unable to detect ACR ABAC mode for '%s': %s. "
836+
"Continuing without explicit source registry credentials.",
837+
registry_name,
838+
registry_lookup_error,
839+
)
840+
791841
# Extract just the image name and tag (without registry)
792842
if '/' in image_name:
793843
image_without_registry = image_name.split('/', 1)[1]
@@ -817,7 +867,8 @@ def _build_image_remotely(cmd, source_dir, image_name, # pylint: disable=too-ma
817867
source_location=source_location,
818868
platform=PlatformProperties(os='Linux', architecture='amd64'),
819869
docker_file_path=dockerfile_name,
820-
timeout=3600
870+
timeout=3600,
871+
credentials=build_credentials,
821872
)
822873

823874
queued = client_registries.schedule_run(
@@ -843,7 +894,8 @@ def _build_image_remotely(cmd, source_dir, image_name, # pylint: disable=too-ma
843894
encoded_task_content=base64.b64encode(yaml_body.encode()).decode(),
844895
source_location=source_location,
845896
timeout=3600,
846-
platform=PlatformProperties(os='Linux', architecture='amd64')
897+
platform=PlatformProperties(os='Linux', architecture='amd64'),
898+
credentials=build_credentials,
847899
)
848900

849901
queued = client_registries.schedule_run(
@@ -1729,6 +1781,10 @@ def _check_project_acr_access(cmd, client, account_name, project_name, registry_
17291781
"""
17301782
Check if AI Foundry project's managed identity has AcrPull access to container registry.
17311783
1784+
When ABAC is enabled on the registry and assignments have conditions, this function
1785+
treats them as granting access (warn-but-don't-block) because ABAC condition strings
1786+
are complex and cannot be reliably evaluated client-side.
1787+
17321788
Args:
17331789
cmd: CLI command context
17341790
client: Service client
@@ -1737,13 +1793,14 @@ def _check_project_acr_access(cmd, client, account_name, project_name, registry_
17371793
registry_name: ACR registry name (without .azurecr.io)
17381794
17391795
Returns:
1740-
tuple: (has_access: bool, principal_id: str, error_message: str)
1796+
tuple: (has_access: bool, principal_id: str, error_message: str, abac_enabled: bool)
17411797
17421798
Limitations:
17431799
- Only validates well-known role names (AcrPull, AcrPush, Reader, Contributor, Owner, etc.)
17441800
- Custom roles with pull permissions may not be detected
17451801
- Inherited permissions from parent scopes (resource group, subscription) are not checked
17461802
- Only validates direct role assignments on the ACR resource
1803+
- ABAC conditions are not evaluated; a warning is logged instead
17471804
"""
17481805
from azure.cli.core.commands.client_factory import get_subscription_id
17491806
from azure.cli.command_modules.role.custom import list_role_assignments
@@ -1752,12 +1809,9 @@ def _check_project_acr_access(cmd, client, account_name, project_name, registry_
17521809
# Get resource group from account name
17531810
resource_group_name = _get_resource_group_by_account_name(cmd, account_name)
17541811

1755-
# Get project to find its managed identity
1812+
# Get project to find its managed identity (project-level identity, not account-level)
17561813
from azure.cli.command_modules.cognitiveservices._client_factory import cf_projects
1757-
projects_client = cf_projects(cmd.cli_ctx)
1758-
1759-
# Get project resource (project-level identity, not account-level)
1760-
project = projects_client.get(
1814+
project = cf_projects(cmd.cli_ctx).get(
17611815
resource_group_name=resource_group_name,
17621816
account_name=account_name,
17631817
project_name=project_name
@@ -1767,20 +1821,33 @@ def _check_project_acr_access(cmd, client, account_name, project_name, registry_
17671821
if not project.identity or not project.identity.principal_id:
17681822
return (False, None,
17691823
f"Project '{project_name}' does not have a system-assigned managed identity enabled. "
1770-
f"A project identity is automatically created when the project is created.")
1824+
f"A project identity is automatically created when the project is created.",
1825+
False)
17711826

17721827
principal_id = project.identity.principal_id
17731828

1774-
# Get ACR resource ID
1829+
# Get ACR resource ID and check ABAC mode
1830+
from azure.cli.command_modules.acr._client_factory import cf_acr_registries
17751831
from azure.cli.command_modules.acr._utils import get_resource_group_name_by_registry_name
1776-
subscription_id = get_subscription_id(cmd.cli_ctx)
1832+
from azure.mgmt.containerregistry.models import RoleAssignmentMode
17771833
acr_resource_group = get_resource_group_name_by_registry_name(
17781834
cmd.cli_ctx, registry_name)
17791835
acr_resource_id = (
1780-
f"/subscriptions/{subscription_id}/resourceGroups/{acr_resource_group}/"
1836+
f"/subscriptions/{get_subscription_id(cmd.cli_ctx)}/resourceGroups/{acr_resource_group}/"
17811837
f"providers/Microsoft.ContainerRegistry/registries/{registry_name}"
17821838
)
17831839

1840+
# Detect whether ABAC is enabled on the registry
1841+
try:
1842+
acr_registry = cf_acr_registries(cmd.cli_ctx).get(acr_resource_group, registry_name)
1843+
abac_enabled = (
1844+
getattr(acr_registry, 'role_assignment_mode', None) ==
1845+
RoleAssignmentMode.ABAC_REPOSITORY_PERMISSIONS
1846+
)
1847+
except Exception: # pylint: disable=broad-except
1848+
abac_enabled = False
1849+
logger.debug("Could not determine ACR ABAC mode, assuming standard RBAC")
1850+
17841851
# Check role assignments for AcrPull or higher permissions
17851852
#
17861853
# KNOWN LIMITATION: This checks for well-known role names rather than checking
@@ -1796,45 +1863,45 @@ def _check_project_acr_access(cmd, client, account_name, project_name, registry_
17961863
# However, this is significantly more complex and slower. The current approach
17971864
# follows the pattern used by AKS (see acs/_roleassignments.py) and covers
17981865
# the most common scenarios. Users with custom roles can use --skip-acr-check.
1799-
#
1800-
# Acceptable roles include:
1801-
# Standard ACR roles:
1802-
# - AcrPull: Can pull images
1803-
# - AcrPush: Can pull and push images
1804-
# Repository-scoped roles:
1805-
# - Container Registry Repository Reader: Read access (includes pull)
1806-
# - Container Registry Repository Writer: Read/write access (includes pull)
1807-
# - Container Registry Repository Contributor: Full repository access (includes pull)
1808-
# General Azure roles:
1809-
# - Reader: Can view resources (includes pull)
1810-
# - Contributor, Owner: Full access
1811-
acceptable_roles = [
1812-
'AcrPull',
1813-
'AcrPush',
1814-
'Container Registry Repository Reader',
1815-
'Container Registry Repository Writer',
1816-
'Container Registry Repository Contributor',
1817-
'Reader',
1818-
'Contributor',
1819-
'Owner'
1820-
]
18211866

18221867
# Get role assignments for the principal on the ACR
18231868
assignments = list_role_assignments(cmd, assignee=principal_id, scope=acr_resource_id)
18241869

1825-
# Check if any assignment has acceptable role
1870+
# Check if any assignment has an acceptable role (see _ACR_PULL_ROLES),
1871+
# accounting for ABAC conditions
18261872
for assignment in assignments:
18271873
role_name = assignment.get('roleDefinitionName', '')
1828-
if role_name in acceptable_roles:
1874+
if role_name in _ACR_PULL_ROLES:
1875+
condition = assignment.get('condition', None)
1876+
if condition and abac_enabled:
1877+
# ABAC is enabled and this assignment has a condition.
1878+
# We cannot reliably evaluate ABAC condition strings client-side,
1879+
# so we treat the assignment as valid and warn the user.
1880+
logger.warning(
1881+
"Found '%s' role on ACR '%s' with an ABAC condition. "
1882+
"Cannot verify whether the condition grants access to the "
1883+
"target repository. If the deployment fails with a permission "
1884+
"error, verify the ABAC condition covers the required repository.",
1885+
role_name, registry_name)
1886+
return (True, principal_id, None, abac_enabled)
1887+
# No condition = full scope access (or ABAC not enabled)
18291888
logger.info(
18301889
"Found %s role for project identity on ACR %s",
18311890
role_name, registry_name)
1832-
return (True, principal_id, None)
1891+
return (True, principal_id, None, abac_enabled)
18331892

18341893
# No suitable role found
1894+
if abac_enabled:
1895+
return (
1896+
False, principal_id,
1897+
f"Project managed identity does not have any recognized pull role on "
1898+
f"ABAC-enabled registry '{registry_name}'",
1899+
abac_enabled
1900+
)
18351901
return (
18361902
False, principal_id,
1837-
f"Project managed identity does not have AcrPull access to '{registry_name}'"
1903+
f"Project managed identity does not have AcrPull access to '{registry_name}'",
1904+
abac_enabled
18381905
)
18391906

18401907
except Exception as e: # pylint: disable=broad-except
@@ -1844,7 +1911,7 @@ def _check_project_acr_access(cmd, client, account_name, project_name, registry_
18441911
"use --skip-acr-check to bypass this validation."
18451912
)
18461913
logger.error("ACR access check failed: %s", str(e))
1847-
return (False, None, error_msg)
1914+
return (False, None, error_msg, False)
18481915

18491916

18501917
def _validate_agent_create_parameters(image, source, build_remote, no_start, min_replicas, max_replicas):
@@ -1888,6 +1955,26 @@ def _validate_agent_create_parameters(image, source, build_remote, no_start, min
18881955
_validate_scaling_options(no_start, min_replicas, max_replicas)
18891956

18901957

1958+
def _extract_repository_name_for_acr(image, source, agent_name, registry):
1959+
"""Extract ACR repository path (without tag/digest) for ABAC guidance."""
1960+
if source:
1961+
return agent_name
1962+
1963+
if not image:
1964+
return None
1965+
1966+
if '.azurecr.io/' in image:
1967+
repository = image.split('.azurecr.io/', 1)[1]
1968+
elif registry:
1969+
repository = image
1970+
else:
1971+
return None
1972+
1973+
repository = repository.split('@', 1)[0]
1974+
repository = repository.split(':', 1)[0]
1975+
return repository or None
1976+
1977+
18911978
def agent_create( # pylint: disable=too-many-locals
18921979
cmd,
18931980
client,
@@ -1952,10 +2039,12 @@ def agent_create( # pylint: disable=too-many-locals
19522039

19532040
registry_name = _determine_registry_for_access_check(image, registry, source)
19542041

2042+
image_repo = _extract_repository_name_for_acr(image, source, agent_name, registry)
2043+
19552044
if registry_name and not skip_acr_check:
19562045
logger.info("Checking if project has access to ACR %s...", registry_name)
19572046

1958-
has_access, principal_id, error_msg = _check_project_acr_access(
2047+
has_access, principal_id, error_msg, acr_abac_enabled = _check_project_acr_access(
19592048
cmd, client, account_name, project_name, registry_name
19602049
)
19612050

@@ -1973,21 +2062,43 @@ def agent_create( # pylint: disable=too-many-locals
19732062
except Exception: # pylint: disable=broad-except
19742063
acr_rg = '<acr-resource-group>'
19752064

1976-
error_message = (
1977-
f"{error_msg}\n\n"
1978-
f"AI Foundry needs permission to pull the container image from ACR.\n"
1979-
f"Grant AcrPull role to the project's managed identity:\n\n"
1980-
f" az role assignment create --assignee {principal_id} "
1981-
f"--role AcrPull "
1982-
f"--scope /subscriptions/{subscription_id}/resourceGroups/{acr_rg}/"
1983-
f"providers/Microsoft.ContainerRegistry/registries/{registry_name}\n\n"
1984-
f"Or use Azure Portal:\n"
1985-
f" 1. Open ACR '{registry_name}' → Access Control (IAM)\n"
1986-
f" 2. Add role assignment → AcrPull\n"
1987-
f" 3. Assign access to: Managed Identity\n"
1988-
f" 4. Select the project's managed identity\n\n"
1989-
f"To skip this check (not recommended), use: --skip-acr-check"
2065+
acr_scope = (
2066+
f"/subscriptions/{subscription_id}/resourceGroups/{acr_rg}/"
2067+
f"providers/Microsoft.ContainerRegistry/registries/{registry_name}"
19902068
)
2069+
2070+
if acr_abac_enabled and image_repo:
2071+
error_message = (
2072+
f"{error_msg}\n\n"
2073+
f"This registry has ABAC (repository-level permissions) enabled.\n"
2074+
f"Grant repository-scoped access to the project's managed identity:\n\n"
2075+
f" az role assignment create --assignee {principal_id} "
2076+
f"--role \"Container Registry Repository Reader\" "
2077+
f"--scope {acr_scope} "
2078+
f"--condition \"@Resource[Microsoft.ContainerRegistry/registries/"
2079+
f"repositories] StringEquals '{image_repo}'\" "
2080+
f"--condition-version \"2.0\"\n\n"
2081+
f"Or grant broad access (bypasses ABAC scoping):\n\n"
2082+
f" az role assignment create --assignee {principal_id} "
2083+
f"--role AcrPull "
2084+
f"--scope {acr_scope}\n\n"
2085+
f"To skip this check (not recommended), use: --skip-acr-check"
2086+
)
2087+
else:
2088+
error_message = (
2089+
f"{error_msg}\n\n"
2090+
f"AI Foundry needs permission to pull the container image from ACR.\n"
2091+
f"Grant AcrPull role to the project's managed identity:\n\n"
2092+
f" az role assignment create --assignee {principal_id} "
2093+
f"--role AcrPull "
2094+
f"--scope {acr_scope}\n\n"
2095+
f"Or use Azure Portal:\n"
2096+
f" 1. Open ACR '{registry_name}' → Access Control (IAM)\n"
2097+
f" 2. Add role assignment → AcrPull\n"
2098+
f" 3. Assign access to: Managed Identity\n"
2099+
f" 4. Select the project's managed identity\n\n"
2100+
f"To skip this check (not recommended), use: --skip-acr-check"
2101+
)
19912102
raise ValidationError(error_message)
19922103

19932104
image_uri = _resolve_agent_image_uri(

0 commit comments

Comments
 (0)