Skip to content
Open
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
225 changes: 168 additions & 57 deletions src/azure-cli/azure/cli/command_modules/cognitiveservices/custom.py
Original file line number Diff line number Diff line change
Expand Up @@ -415,6 +415,18 @@ def commitment_plan_create_or_update(

AGENT_API_VERSION_PARAMS = {"api-version": "2025-11-15-preview"}

# Roles that grant pull access to ACR. Used by _check_project_acr_access.
_ACR_PULL_ROLES = {
'AcrPull',
'AcrPush',
'Container Registry Repository Reader',
'Container Registry Repository Writer',
'Container Registry Repository Contributor',
'Reader',
'Contributor',
'Owner',
}


def _validate_image_tag(image_uri):
"""
Expand Down Expand Up @@ -772,11 +784,22 @@ def _build_image_remotely(cmd, source_dir, image_name, # pylint: disable=too-ma
# Use ACR module client factories and utility functions for build operations.
# These private APIs are pinned to specific preview API versions and handle complex
# operations like source upload, task scheduling, and log streaming.
from azure.cli.command_modules.acr._client_factory import cf_acr_registries_tasks, cf_acr_runs
from azure.cli.command_modules.acr._stream_utils import stream_logs
from azure.cli.command_modules.acr._utils import prepare_source_location, get_resource_group_name_by_registry_name
import base64

from azure.cli.command_modules.acr._client_factory import (
cf_acr_registries, cf_acr_registries_tasks, cf_acr_runs,
)
from azure.cli.command_modules.acr._stream_utils import stream_logs
from azure.cli.command_modules.acr._utils import (
prepare_source_location,
get_resource_group_name_by_registry_name,
)
from azure.mgmt.containerregistry.models import (
Credentials as AcrCredentials,
RoleAssignmentMode,
SourceRegistryCredentials,
)

logger.warning("Building image remotely using ACR Task: %s", image_name)

# Get ACR clients - these use preview API versions with build task support
Expand All @@ -787,7 +810,34 @@ def _build_image_remotely(cmd, source_dir, image_name, # pylint: disable=too-ma
resource_group_name = get_resource_group_name_by_registry_name(
cmd.cli_ctx, registry_name)

# For ABAC-enabled registries, one-off build runs (schedule_run) require
# SourceRegistryCredentials with identity='[caller]' so the ACR task
# authenticates as the signed-in CLI user for push. This matches
# `az acr build --source-acr-auth-id [caller]` behavior.
# For non-ABAC registries (or if ABAC mode cannot be determined), no
# explicit credentials are needed.
build_credentials = None

try:
try:
registry = cf_acr_registries(cmd.cli_ctx).get(
resource_group_name, registry_name)
registry_abac_enabled = (
getattr(registry, 'role_assignment_mode', None) ==
RoleAssignmentMode.ABAC_REPOSITORY_PERMISSIONS
)
if registry_abac_enabled:
build_credentials = AcrCredentials(
source_registry=SourceRegistryCredentials(identity='[caller]')
)
except Exception as registry_lookup_error: # pylint: disable=broad-except
logger.debug(
"Unable to detect ACR ABAC mode for '%s': %s. "
"Continuing without explicit source registry credentials.",
registry_name,
registry_lookup_error,
)

# Extract just the image name and tag (without registry)
if '/' in image_name:
image_without_registry = image_name.split('/', 1)[1]
Expand Down Expand Up @@ -817,7 +867,8 @@ def _build_image_remotely(cmd, source_dir, image_name, # pylint: disable=too-ma
source_location=source_location,
platform=PlatformProperties(os='Linux', architecture='amd64'),
docker_file_path=dockerfile_name,
timeout=3600
timeout=3600,
credentials=build_credentials,
)

queued = client_registries.schedule_run(
Expand All @@ -843,7 +894,8 @@ def _build_image_remotely(cmd, source_dir, image_name, # pylint: disable=too-ma
encoded_task_content=base64.b64encode(yaml_body.encode()).decode(),
source_location=source_location,
timeout=3600,
platform=PlatformProperties(os='Linux', architecture='amd64')
platform=PlatformProperties(os='Linux', architecture='amd64'),
credentials=build_credentials,
)

queued = client_registries.schedule_run(
Expand Down Expand Up @@ -1729,6 +1781,10 @@ def _check_project_acr_access(cmd, client, account_name, project_name, registry_
"""
Check if AI Foundry project's managed identity has AcrPull access to container registry.

When ABAC is enabled on the registry and assignments have conditions, this function
treats them as granting access (warn-but-don't-block) because ABAC condition strings
are complex and cannot be reliably evaluated client-side.

Args:
cmd: CLI command context
client: Service client
Expand All @@ -1737,13 +1793,14 @@ def _check_project_acr_access(cmd, client, account_name, project_name, registry_
registry_name: ACR registry name (without .azurecr.io)

Returns:
tuple: (has_access: bool, principal_id: str, error_message: str)
tuple: (has_access: bool, principal_id: str, error_message: str, abac_enabled: bool)

Limitations:
- Only validates well-known role names (AcrPull, AcrPush, Reader, Contributor, Owner, etc.)
- Custom roles with pull permissions may not be detected
- Inherited permissions from parent scopes (resource group, subscription) are not checked
- Only validates direct role assignments on the ACR resource
- ABAC conditions are not evaluated; a warning is logged instead
"""
from azure.cli.core.commands.client_factory import get_subscription_id
from azure.cli.command_modules.role.custom import list_role_assignments
Expand All @@ -1752,12 +1809,9 @@ def _check_project_acr_access(cmd, client, account_name, project_name, registry_
# Get resource group from account name
resource_group_name = _get_resource_group_by_account_name(cmd, account_name)

# Get project to find its managed identity
# Get project to find its managed identity (project-level identity, not account-level)
from azure.cli.command_modules.cognitiveservices._client_factory import cf_projects
projects_client = cf_projects(cmd.cli_ctx)

# Get project resource (project-level identity, not account-level)
project = projects_client.get(
project = cf_projects(cmd.cli_ctx).get(
resource_group_name=resource_group_name,
account_name=account_name,
project_name=project_name
Expand All @@ -1767,20 +1821,33 @@ def _check_project_acr_access(cmd, client, account_name, project_name, registry_
if not project.identity or not project.identity.principal_id:
return (False, None,
f"Project '{project_name}' does not have a system-assigned managed identity enabled. "
f"A project identity is automatically created when the project is created.")
f"A project identity is automatically created when the project is created.",
False)

principal_id = project.identity.principal_id

# Get ACR resource ID
# Get ACR resource ID and check ABAC mode
from azure.cli.command_modules.acr._client_factory import cf_acr_registries
from azure.cli.command_modules.acr._utils import get_resource_group_name_by_registry_name
subscription_id = get_subscription_id(cmd.cli_ctx)
from azure.mgmt.containerregistry.models import RoleAssignmentMode
acr_resource_group = get_resource_group_name_by_registry_name(
cmd.cli_ctx, registry_name)
acr_resource_id = (
f"/subscriptions/{subscription_id}/resourceGroups/{acr_resource_group}/"
f"/subscriptions/{get_subscription_id(cmd.cli_ctx)}/resourceGroups/{acr_resource_group}/"
f"providers/Microsoft.ContainerRegistry/registries/{registry_name}"
)

# Detect whether ABAC is enabled on the registry
try:
acr_registry = cf_acr_registries(cmd.cli_ctx).get(acr_resource_group, registry_name)
abac_enabled = (
getattr(acr_registry, 'role_assignment_mode', None) ==
RoleAssignmentMode.ABAC_REPOSITORY_PERMISSIONS
)
except Exception: # pylint: disable=broad-except
abac_enabled = False
logger.debug("Could not determine ACR ABAC mode, assuming standard RBAC")

# Check role assignments for AcrPull or higher permissions
#
# KNOWN LIMITATION: This checks for well-known role names rather than checking
Expand All @@ -1796,45 +1863,45 @@ def _check_project_acr_access(cmd, client, account_name, project_name, registry_
# However, this is significantly more complex and slower. The current approach
# follows the pattern used by AKS (see acs/_roleassignments.py) and covers
# the most common scenarios. Users with custom roles can use --skip-acr-check.
#
# Acceptable roles include:
# Standard ACR roles:
# - AcrPull: Can pull images
# - AcrPush: Can pull and push images
# Repository-scoped roles:
# - Container Registry Repository Reader: Read access (includes pull)
# - Container Registry Repository Writer: Read/write access (includes pull)
# - Container Registry Repository Contributor: Full repository access (includes pull)
# General Azure roles:
# - Reader: Can view resources (includes pull)
# - Contributor, Owner: Full access
acceptable_roles = [
'AcrPull',
'AcrPush',
'Container Registry Repository Reader',
'Container Registry Repository Writer',
'Container Registry Repository Contributor',
'Reader',
'Contributor',
'Owner'
]

# Get role assignments for the principal on the ACR
assignments = list_role_assignments(cmd, assignee=principal_id, scope=acr_resource_id)

# Check if any assignment has acceptable role
# Check if any assignment has an acceptable role (see _ACR_PULL_ROLES),
# accounting for ABAC conditions
for assignment in assignments:
role_name = assignment.get('roleDefinitionName', '')
if role_name in acceptable_roles:
if role_name in _ACR_PULL_ROLES:
condition = assignment.get('condition', None)
if condition and abac_enabled:
# ABAC is enabled and this assignment has a condition.
# We cannot reliably evaluate ABAC condition strings client-side,
# so we treat the assignment as valid and warn the user.
logger.warning(
"Found '%s' role on ACR '%s' with an ABAC condition. "
"Cannot verify whether the condition grants access to the "
"target repository. If the deployment fails with a permission "
"error, verify the ABAC condition covers the required repository.",
role_name, registry_name)
return (True, principal_id, None, abac_enabled)
# No condition = full scope access (or ABAC not enabled)
logger.info(
"Found %s role for project identity on ACR %s",
role_name, registry_name)
return (True, principal_id, None)
return (True, principal_id, None, abac_enabled)
Comment on lines +1875 to +1891
Copy link

Copilot AI Feb 27, 2026

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

The logic for handling conditioned role assignments needs refinement. If a role assignment has a condition but abac_enabled is False (either because ABAC detection failed or the registry doesn't have ABAC enabled), the code falls through to line 1888-1891 and returns True, treating the conditioned assignment as fully valid. This could incorrectly accept conditioned assignments that may not grant the required access. Consider adding an else-if branch: if condition exists but abac_enabled is False, either log a warning or treat it as a failure, since conditioned assignments typically indicate repository-scoped permissions that should be validated.

Copilot uses AI. Check for mistakes.

# No suitable role found
if abac_enabled:
return (
False, principal_id,
f"Project managed identity does not have any recognized pull role on "
f"ABAC-enabled registry '{registry_name}'",
abac_enabled
)
return (
False, principal_id,
f"Project managed identity does not have AcrPull access to '{registry_name}'"
f"Project managed identity does not have AcrPull access to '{registry_name}'",
abac_enabled
)
Comment on lines 1840 to 1905
Copy link

Copilot AI Feb 27, 2026

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

The ABAC handling logic in _check_project_acr_access lacks test coverage. Given the complexity of the warn-but-don't-block pattern for conditioned assignments (lines 1876-1886) and the ABAC-specific error messages (lines 1894-1900), consider adding unit tests or integration tests to verify: (1) correct handling when ABAC is enabled with conditioned assignments, (2) correct handling when ABAC is disabled but conditions exist, and (3) correct error messages for ABAC vs non-ABAC registries. Mock the ACR registry fetch and role assignments to test different scenarios.

Copilot uses AI. Check for mistakes.

except Exception as e: # pylint: disable=broad-except
Expand All @@ -1844,7 +1911,7 @@ def _check_project_acr_access(cmd, client, account_name, project_name, registry_
"use --skip-acr-check to bypass this validation."
)
logger.error("ACR access check failed: %s", str(e))
return (False, None, error_msg)
return (False, None, error_msg, False)


def _validate_agent_create_parameters(image, source, build_remote, no_start, min_replicas, max_replicas):
Expand Down Expand Up @@ -1888,6 +1955,26 @@ def _validate_agent_create_parameters(image, source, build_remote, no_start, min
_validate_scaling_options(no_start, min_replicas, max_replicas)


def _extract_repository_name_for_acr(image, source, agent_name, registry):
"""Extract ACR repository path (without tag/digest) for ABAC guidance."""
if source:
return agent_name

if not image:
return None

if '.azurecr.io/' in image:
repository = image.split('.azurecr.io/', 1)[1]
elif registry:
repository = image
else:
return None

repository = repository.split('@', 1)[0]
repository = repository.split(':', 1)[0]
return repository or None

Comment on lines +1958 to +1976
Copy link

Copilot AI Feb 27, 2026

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

The new helper function _extract_repository_name_for_acr lacks unit tests. The test file shows a pattern of unit testing for similar helper functions (e.g., test_has_dockerfile_*, test_is_fully_qualified_image). Consider adding unit tests covering edge cases: source with agent_name, image with .azurecr.io/, image with registry param, digest format (@sha256:...), tag format (:v1), and combinations thereof.

Copilot uses AI. Check for mistakes.

def agent_create( # pylint: disable=too-many-locals
cmd,
client,
Expand Down Expand Up @@ -1952,10 +2039,12 @@ def agent_create( # pylint: disable=too-many-locals

registry_name = _determine_registry_for_access_check(image, registry, source)

image_repo = _extract_repository_name_for_acr(image, source, agent_name, registry)

if registry_name and not skip_acr_check:
logger.info("Checking if project has access to ACR %s...", registry_name)

has_access, principal_id, error_msg = _check_project_acr_access(
has_access, principal_id, error_msg, acr_abac_enabled = _check_project_acr_access(
cmd, client, account_name, project_name, registry_name
)

Expand All @@ -1973,21 +2062,43 @@ def agent_create( # pylint: disable=too-many-locals
except Exception: # pylint: disable=broad-except
acr_rg = '<acr-resource-group>'

error_message = (
f"{error_msg}\n\n"
f"AI Foundry needs permission to pull the container image from ACR.\n"
f"Grant AcrPull role to the project's managed identity:\n\n"
f" az role assignment create --assignee {principal_id} "
f"--role AcrPull "
f"--scope /subscriptions/{subscription_id}/resourceGroups/{acr_rg}/"
f"providers/Microsoft.ContainerRegistry/registries/{registry_name}\n\n"
f"Or use Azure Portal:\n"
f" 1. Open ACR '{registry_name}' → Access Control (IAM)\n"
f" 2. Add role assignment → AcrPull\n"
f" 3. Assign access to: Managed Identity\n"
f" 4. Select the project's managed identity\n\n"
f"To skip this check (not recommended), use: --skip-acr-check"
acr_scope = (
f"/subscriptions/{subscription_id}/resourceGroups/{acr_rg}/"
f"providers/Microsoft.ContainerRegistry/registries/{registry_name}"
)

if acr_abac_enabled and image_repo:
error_message = (
f"{error_msg}\n\n"
f"This registry has ABAC (repository-level permissions) enabled.\n"
f"Grant repository-scoped access to the project's managed identity:\n\n"
f" az role assignment create --assignee {principal_id} "
f"--role \"Container Registry Repository Reader\" "
f"--scope {acr_scope} "
f"--condition \"@Resource[Microsoft.ContainerRegistry/registries/"
f"repositories] StringEquals '{image_repo}'\" "
f"--condition-version \"2.0\"\n\n"
f"Or grant broad access (bypasses ABAC scoping):\n\n"
f" az role assignment create --assignee {principal_id} "
f"--role AcrPull "
f"--scope {acr_scope}\n\n"
f"To skip this check (not recommended), use: --skip-acr-check"
)
else:
error_message = (
f"{error_msg}\n\n"
f"AI Foundry needs permission to pull the container image from ACR.\n"
f"Grant AcrPull role to the project's managed identity:\n\n"
f" az role assignment create --assignee {principal_id} "
f"--role AcrPull "
f"--scope {acr_scope}\n\n"
f"Or use Azure Portal:\n"
f" 1. Open ACR '{registry_name}' → Access Control (IAM)\n"
f" 2. Add role assignment → AcrPull\n"
f" 3. Assign access to: Managed Identity\n"
f" 4. Select the project's managed identity\n\n"
f"To skip this check (not recommended), use: --skip-acr-check"
)
Comment on lines +2070 to +2101
Copy link

Copilot AI Feb 27, 2026

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

When ABAC is enabled but image_repo is None (line 2070 condition fails), the error message falls through to the else block (line 2087-2101) and provides generic AcrPull guidance that doesn't mention ABAC. This could mislead users with ABAC-enabled registries. Consider adding a third case: if acr_abac_enabled is True but image_repo is None, provide an ABAC-aware message without the repository-specific condition command, or explain why the repository name couldn't be determined.

Copilot uses AI. Check for mistakes.
raise ValidationError(error_message)

image_uri = _resolve_agent_image_uri(
Expand Down
Loading