Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
8 changes: 8 additions & 0 deletions tests_e2e/orchestrator/lib/agent_test_loader.py
Original file line number Diff line number Diff line change
Expand Up @@ -80,6 +80,10 @@ class VmImageInfo(object):
locations: Dict[str, List[str]]
# Indicates that the image is available only for those VM sizes. If empty, the image should be available for all VM sizes
vm_sizes: List[str]
# Optional security type (e.g. "ConfidentialVM") to use when deploying this image. When set, the deployment
Copy link
Copy Markdown
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Previously, we were depending on LISA to correctly deploy VMs with the 'ConfidentialVM' security type when using our *_cvm image definitions (even though those images can be used for TrustedLaunch or Standard VMs).
Now, we explicitly tell lisa to deploy CVMs when using those images instead of relying on their selection logic.

# is forced to use this security type both for VM (via LISA's Security_Profile requirement) and for VMSS
# (via the 'securityType' parameter in the ARM template). When empty, the default deployment behavior is used.
security_type: str

def __str__(self):
return self.urn
Expand Down Expand Up @@ -373,12 +377,16 @@ def _load_images() -> Dict[str, List[VmImageInfo]]:
i.urn = description
i.locations = {}
i.vm_sizes = []
i.security_type = ""
else:
if "urn" not in description:
raise Exception(f"Image {name} is missing the 'urn' property: {description}")
i.urn = description["urn"]
i.locations = description["locations"] if "locations" in description else {}
i.vm_sizes = description["vm_sizes"] if "vm_sizes" in description else []
i.security_type = description["security_type"] if "security_type" in description else ""
if i.security_type not in ("", "ConfidentialVM"):
raise Exception(f"Invalid security_type {i.security_type} for image {name} in images.yml; expected one of '', 'ConfidentialVM'")
for cloud in i.locations.keys():
if cloud not in ["AzureCloud", "AzureChinaCloud", "AzureUSGovernment"]:
raise Exception(f"Invalid cloud {cloud} for image {name} in images.yml")
Expand Down
19 changes: 18 additions & 1 deletion tests_e2e/orchestrator/lib/agent_test_suite.py
Original file line number Diff line number Diff line change
Expand Up @@ -145,6 +145,9 @@ def __init__(self, metadata: TestSuiteMetadata) -> None:
self._location: str # Azure location (region) where test VMs are located
self._image: str # Image used to create the test VMs; it can be empty if LISA chose the size, or when using an existing VM

self._vm_size: str # VM size to use when creating scale sets; empty means use the template default
self._security_type: str # ARM security type (e.g. 'ConfidentialVM') to use when deploying scale sets; empty means template default

self._is_vhd: bool # True when the test VMs were created by LISA from a VHD; this is usually used to validate a new VHD and the test Agent is not installed

# username and public SSH key for the admin account used to connect to the test VMs
Expand Down Expand Up @@ -203,6 +206,8 @@ def _initialize(self, environment: Environment, variables: Dict[str, Any], lisa_
self._subscription_id = variables["subscription_id"]
self._location = variables["c_location"]
self._image = variables["c_image"]
self._vm_size = variables["c_vm_size"]
self._security_type = variables["c_security_type"]

self._is_vhd = variables["c_is_vhd"]

Expand Down Expand Up @@ -903,7 +908,7 @@ def read_file(path: str) -> str:
if self._allow_ssh != '':
network_security_rule.add_allow_ssh_rule(self._allow_ssh)

return template, {
parameters = {
"username": {"value": self._user},
"sshPublicKey": {"value": read_file(f"{self._identity_file}.pub")},
"vmName": {"value": scale_set_name},
Expand All @@ -913,5 +918,17 @@ def read_file(path: str) -> str:
"version": {"value": version}
}

# If the image definition (in images.yml) or the runbook specifies a VM size, use it; otherwise fall back
# to the template default.
if self._vm_size != '':
parameters["vmSize"] = {"value": self._vm_size}

# If the image definition (in images.yml) declares a security type (e.g. 'ConfidentialVM'), set it on the
# scale set; otherwise the template default ('Standard') is used.
if self._security_type != '':
parameters["securityType"] = {"value": self._security_type}

return template, parameters



29 changes: 27 additions & 2 deletions tests_e2e/orchestrator/lib/agent_test_suite_combinator.py
Original file line number Diff line number Diff line change
Expand Up @@ -204,6 +204,7 @@ def create_environment_list(self, test_suites: List[str]) -> List[Dict[str, Any]
continue

vm_size = self._get_vm_size(image)
security_type = image.security_type

locations: List[str] = self._get_locations(test_suite_info, image)
if len(locations) == 0:
Expand All @@ -223,6 +224,7 @@ def create_environment_list(self, test_suites: List[str]) -> List[Dict[str, Any]
marketplace_image=marketplace_image,
location=location,
vm_size=vm_size,
security_type=security_type,
test_suite_info=test_suite_info)
else:
env = self.create_vm_environment(
Expand All @@ -232,6 +234,7 @@ def create_environment_list(self, test_suites: List[str]) -> List[Dict[str, Any]
shared_gallery=shared_gallery,
location=location,
vm_size=vm_size,
security_type=security_type,
test_suite_info=test_suite_info)
environments.append(env)
else:
Expand All @@ -247,6 +250,7 @@ def create_environment_list(self, test_suites: List[str]) -> List[Dict[str, Any]
marketplace_image=marketplace_image,
location=location,
vm_size=vm_size,
security_type=security_type,
test_suite_info=test_suite_info)
else:
env = self.create_vm_environment(
Expand All @@ -256,6 +260,7 @@ def create_environment_list(self, test_suites: List[str]) -> List[Dict[str, Any]
shared_gallery=shared_gallery,
location=location,
vm_size=vm_size,
security_type=security_type,
test_suite_info=test_suite_info)
shared_environments[env_name] = env

Expand Down Expand Up @@ -371,7 +376,7 @@ def create_existing_vmss_environment(self, test_suites: List[str]) -> Dict[str,
"c_test_suites": loader.test_suites,
}

def create_vm_environment(self, env_name: str, marketplace_image: str, vhd: str, shared_gallery: str, location: str, vm_size: str, test_suite_info: TestSuiteInfo) -> Dict[str, Any]:
def create_vm_environment(self, env_name: str, marketplace_image: str, vhd: str, shared_gallery: str, location: str, vm_size: str, test_suite_info: TestSuiteInfo, security_type: str = "") -> Dict[str, Any]:
#
# Custom ARM templates (to create the test VMs) require special handling. These templates are processed by the azure_update_arm_template
# hook, which does not have access to the runbook variables. Instead, we use a dummy VM tag named "templates" and pass the
Expand Down Expand Up @@ -435,9 +440,24 @@ def create_vm_environment(self, env_name: str, marketplace_image: str, vhd: str,
}
]
}
elif security_type == "ConfidentialVM":
# On the VM path LISA performs the deployment, so the security type must be expressed as a LISA feature
# requirement on 'c_platform' (LISA does not look at the 'c_security_type' variable, which is consumed
# only by 'AgentTestSuite' on the VMSS path). This forces LISA to deploy the image as a Confidential VM
# regardless of which security profiles the image and VM size happen to support; without it, LISA's
# priority-based selection may pick a non-CVM profile. Note that LISA's SecurityProfileType enum uses
# the lowercase value 'cvm' (which it maps internally to ARM's 'ConfidentialVM').
environment['c_platform'][0]['requirement']["features"] = {
"items": [
{
"type": "Security_Profile",
"security_profile": "cvm"
}
]
}
return environment

def create_vmss_environment(self, env_name: str, marketplace_image: str, location: str, vm_size: str, test_suite_info: TestSuiteInfo) -> Dict[str, Any]:
def create_vmss_environment(self, env_name: str, marketplace_image: str, location: str, vm_size: str, test_suite_info: TestSuiteInfo, security_type: str = "") -> Dict[str, Any]:
return {
"c_platform": [
{
Expand All @@ -461,6 +481,9 @@ def create_vmss_environment(self, env_name: str, marketplace_image: str, locatio
"c_location": location,
"c_image": marketplace_image,
"c_is_vhd": False,
# On the VMSS path the scale set is deployed by 'AgentTestSuite' using our own ARM template
# (vmss.json), bypassing LISA.
"c_security_type": security_type,
"c_vm_size": vm_size,
"vm_tags": {}
}
Expand All @@ -484,6 +507,7 @@ def _get_runbook_images(self, loader: AgentTestLoader) -> List[VmImageInfo]:
i.urn = self.runbook.image # Note that this could be a URN or the URI for a VHD, or an image from a shared gallery
i.locations = []
i.vm_sizes = []
i.security_type = ""

return [i]

Expand All @@ -503,6 +527,7 @@ def _get_test_suite_images(suite: TestSuiteInfo, loader: AgentTestLoader) -> Lis
i.urn = image
i.locations = []
i.vm_sizes = []
i.security_type = ""
image_list = [i]
else:
image_list = loader.images[image]
Expand Down
16 changes: 11 additions & 5 deletions tests_e2e/orchestrator/runbook.yml
Original file line number Diff line number Diff line change
Expand Up @@ -45,11 +45,8 @@ variable:
- agent_status
- agent_update
- ext_cgroups
#
# TODO: These tests are disabled temporarily since our test account does not have quota to create the Confidential VMs required by the tests.
#
# - ext_policy
# - ext_policy_with_dependencies
- ext_policy
- ext_policy_with_dependencies
- ext_sequencing
- ext_signature_validation
- ext_telemetry_pipeline
Expand Down Expand Up @@ -221,6 +218,15 @@ variable:
value: false
is_case_visible: true

#
# Security type to use when deploying the VM/VMSS resource (e.g. "ConfidentialVM"). Empty means use the default
# deployment behavior. Populated by the AgentTestSuiteCombinator from the 'security_type' property on the
# image in images.yml.
#
- name: c_security_type
value: ""
is_case_visible: true

environment: $(c_environment)

platform: $(c_platform)
Expand Down
18 changes: 16 additions & 2 deletions tests_e2e/orchestrator/templates/vmss.json
Original file line number Diff line number Diff line change
Expand Up @@ -26,6 +26,18 @@
},
"version": {
"type": "string"
},
"vmSize": {
"type": "string",
"defaultValue": "Standard_D2s_v3"
},
"securityType": {
"type": "string",
"defaultValue": "Standard",
"allowedValues": [
"Standard",
"ConfidentialVM"
]
}
},
"variables": {
Expand Down Expand Up @@ -167,7 +179,7 @@
"[concat('Microsoft.Network/loadBalancers/', variables('lbName'))]"
],
"sku": {
"name": "Standard_D2s_v3",
"name": "[parameters('vmSize')]",
"tier": "Standard",
"capacity": 3
},
Expand Down Expand Up @@ -199,7 +211,8 @@
"createOption": "FromImage",
"caching": "ReadWrite",
"managedDisk": {
"storageAccountType": "Premium_LRS"
"storageAccountType": "Premium_LRS",
"securityProfile": "[if(equals(parameters('securityType'), 'ConfidentialVM'), createObject('securityEncryptionType', 'DiskWithVMGuestState'), json('null'))]"
},
"diskSizeGB": 64
},
Expand All @@ -210,6 +223,7 @@
"version": "[parameters('version')]"
}
},
"securityProfile": "[if(equals(parameters('securityType'), 'ConfidentialVM'), createObject('securityType', 'ConfidentialVM', 'uefiSettings', createObject('secureBootEnabled', true(), 'vTpmEnabled', true())), json('null'))]",
"diagnosticsProfile": {
"bootDiagnostics": {
"enabled": true
Expand Down
10 changes: 4 additions & 6 deletions tests_e2e/test_suites/ext_policy.yml
Original file line number Diff line number Diff line change
@@ -1,15 +1,13 @@
#
# The test suite verifies that disallowed extensions are not processed, but the agent should still report status.
#
# TODO: This test suite takes ~30 minutes to run. This should be optimized to reduce impact to pipeline run times.
name: "ExtensionPolicy"
tests:
- "ext_policy/ext_policy.py"
images:
- "endorsed"
- "random(endorsed,10)" # TODO: Remove randomization and run on all endorsed images once the test suite is optimized to reduce runtime.
Copy link
Copy Markdown
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I made the decision to limit this scenario to 10 random endorsed images per day, in addition to all 6 cvm-endorsed images. That gives us coverage across 16 images per daily run.

I made this change because each run of this suite takes ~30 minutes (primarily due to the delete scenario waiting on 15min CRP timeout). If this test runs on all 30 endorsed images, most of the 32 available lisa runners will be stuck on the environments with this scenario, preventing us from processing other environments, and we get pipeline timeouts.

I've added a TODO to optimize this test, but in the meantime, I think it's appropriate to only run on 10 of the endorsed images and all of the cvm-endorsed images. If we want more coverage, we can extend the pipeline timeout instead as a temporary measure until the test is optimized.

- "cvm-endorsed"
# This test is executed in southcentralus as a workaround for recurring fabric "ServiceUnavailableFault" issues observed in westus2.
locations: "AzureCloud:southcentralus"
Copy link
Copy Markdown
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Previously this ran on southcentralus when we only ran on 'endorsed' images, but the CVM sku we're using is not available in southcentralus.

Instead of updating the location for the entire test suite to westeurope (where the CVM sku is available), I just updated the CVM image definitions in images.yml to list the regions it is available in.

Now, this suite will run on the default location (westus2) for the 'endorsed' images and westeurope for the 'cvm-endorsed' images (since that is the first region listed in the image.yml definitions for those cvm images).

This reduces the # of environments created per test run

# TODO: This test is currently failing on usgov cloud due to an issue with the GuestConfig extension. Re-enable once the extension fix has been rolled out.
Copy link
Copy Markdown
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I checked and I'm not seeing that issue in USGov anymore, the test is passing there.

There are GuestConfig ext failures in China cloud now though, but only on the debian_11 image, so I am skipping that one only

skip_on_clouds:
- "AzureUSGovernment"
owns_vm: false
skip_on_images:
- "AzureChinaCloud:debian_11" # The ConfigurationforLinux-1.26.109 extension is failing on Debian 11 in China cloud only; skip this image until the issue in the extension is fixed
8 changes: 4 additions & 4 deletions tests_e2e/test_suites/ext_policy_with_dependencies.yml
Original file line number Diff line number Diff line change
Expand Up @@ -4,11 +4,11 @@
name: "ExtPolicyWithDependencies"
tests:
- "ext_policy/ext_policy_with_dependencies.py"
images: "endorsed"
images:
- "endorsed"
- "cvm-endorsed"
Copy link
Copy Markdown
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

After reading this comment, I confirmed that the CVM VMSS are being created with the default sku (Standard_D2s_v3) instead of the CVM sku from the images.yml (Standard_DC2ads_v5):
image
I didn't notice because there weren't any deployment failures, but turns out they were not being created as CVMs:
2026-05-13T17:59:21.601406Z INFO ExtHandler ExtHandler This is not a confidential virtual machine.

Good catch by Copilot :) I'll fix it

Copy link
Copy Markdown
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Fixed

executes_on_scale_set: true
owns_vm: false
# This test is executed in southcentralus as a workaround for recurring fabric "ServiceUnavailableFault" issues observed in westus2.
locations: "AzureCloud:southcentralus"
Copy link
Copy Markdown
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Same reasoning for removing the location here as the ext_policy scenario


# TODO: Currently AlmaLinux is not available for scale sets; enable this image when it is available.
skip_on_images:
Expand All @@ -18,4 +18,4 @@ skip_on_images:

# TODO: The current deployment of VmAccess 1.5.22 prevents the extension from uninstalling; enable this test when the issue is fixed
Copy link
Copy Markdown
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Nora confirmed v1.5.24 of VMaccess with the fix for this will hit USGov regions by end of this week, so we should be able to remove this soon

skip_on_clouds:
- "AzureUSGovernment"
- "AzureUSGovernment"
1 change: 0 additions & 1 deletion tests_e2e/test_suites/ext_signature_validation.yml
Original file line number Diff line number Diff line change
Expand Up @@ -7,7 +7,6 @@ tests:
# Extension signature is sent by CRP only for CVMs, so this test suite should run exclusively on CVMs.
images: "cvm-endorsed"
# Extension signatures are currently only available in the public cloud, so we skip this test on other clouds.
locations: "AzureCloud:westeurope"
Copy link
Copy Markdown
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

We specified the location for this suite, since there the CVM sku we're using is only available in certain regions. I updated the CVM image definitions in images.yml with the locations they are able to be used on, so this is no longer necessary.

skip_on_clouds:
- "AzureChinaCloud"
- "AzureUSGovernment"
Expand Down
Loading
Loading