Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
105 changes: 105 additions & 0 deletions libs/net/vmspec.py
Original file line number Diff line number Diff line change
Expand Up @@ -3,6 +3,7 @@
from typing import Any, Final

from kubernetes.dynamic.client import ResourceField
from ocp_resources.utils.resource_constants import ResourceConstants
Comment thread
azhivovk marked this conversation as resolved.
Comment thread
azhivovk marked this conversation as resolved.
from ocp_resources.virtual_machine import VirtualMachine
from timeout_sampler import retry

Expand Down Expand Up @@ -31,6 +32,14 @@ class IpNotFound(Exception):
pass


class VMIConditionNotReachedError(Exception):
pass


class VMIConditionStillPresentError(Exception):
pass


def _default_interface_predicate(interface: ResourceField) -> bool:
return "guest-agent" in interface["infoSource"] and interface[IP_ADDRESS]

Expand Down Expand Up @@ -194,3 +203,99 @@ def wait_for_ifaces_status(
)
),
)


def wait_for_vmi_condition_status(
vm: BaseVirtualMachine,
condition: str,
status: str = ResourceConstants.Condition.Status.TRUE,
timeout: int = 300,
resource_version: str | None = None,
) -> None:
"""Wait for a VMI status condition using the Kubernetes Watch API.

Args:
vm: The virtual machine to watch.
condition: Condition type to wait for (e.g. "MigrationRequired").
status: Expected condition status ("True" or "False"). Defaults to "True".
timeout: Maximum seconds to wait.
resource_version: VMI resource version captured before the triggering action.
Ensures no condition transitions are missed between the action and this call.

Raises:
VMIConditionNotReachedError: If the condition is not reached within timeout.
"""
vmi = vm.vmi
vmi_instance = vmi.instance
existing_conditions = vmi_instance.status.conditions
if existing_conditions and _vmi_condition_set(
existing_conditions=existing_conditions, required_condition=condition, status=status
):
return

watch_from_resource_version = resource_version or vmi_instance.metadata.resourceVersion
for event in vmi.watcher(timeout=timeout, resource_version=watch_from_resource_version):
if event["type"] != "MODIFIED":
continue
existing_conditions = event["object"].status.conditions
if existing_conditions and _vmi_condition_set(
existing_conditions=existing_conditions, required_condition=condition, status=status
):
return

raise VMIConditionNotReachedError(
f"VMI {vm.name}: condition {condition}={status} was not reached within {timeout}s."
)


def wait_for_no_vmi_condition(
vm: BaseVirtualMachine,
condition: str,
timeout: int = 300,
resource_version: str | None = None,
) -> None:
"""Wait until a VMI status condition is absent or False.

Succeeds when the condition is either removed from the array or set to False.
Comment thread
azhivovk marked this conversation as resolved.

Args:
vm: The virtual machine to watch.
condition: Condition type to wait on (e.g. "MigrationRequired").
timeout: Maximum seconds to wait.
resource_version: VMI resource version captured before the triggering action.
Ensures no condition transitions are missed between the action and this call.

Raises:
VMIConditionStillPresentError: If the condition is still True or present after timeout.
"""
vmi = vm.vmi
Comment thread
azhivovk marked this conversation as resolved.
vmi_instance = vmi.instance
existing_conditions = vmi_instance.status.conditions
if existing_conditions and _vmi_condition_not_set(
existing_conditions=existing_conditions, required_condition=condition
):
return

watch_from_resource_version = resource_version or vmi_instance.metadata.resourceVersion
for event in vmi.watcher(timeout=timeout, resource_version=watch_from_resource_version):
if event["type"] != "MODIFIED":
continue
existing_conditions = event["object"].status.conditions
if existing_conditions and _vmi_condition_not_set(
existing_conditions=existing_conditions, required_condition=condition
):
return

raise VMIConditionStillPresentError(f"VMI {vm.name}: condition {condition} is still present after {timeout}s.")


def _vmi_condition_set(existing_conditions: list[ResourceField], required_condition: str, status: str) -> bool:
return any(cond.type == required_condition and cond.status == status for cond in existing_conditions)


def _vmi_condition_not_set(existing_conditions: list[ResourceField], required_condition: str) -> bool:
return all(
cond.status == ResourceConstants.Condition.Status.FALSE
for cond in existing_conditions
if cond.type == required_condition
)
14 changes: 14 additions & 0 deletions libs/vm/vm.py
Original file line number Diff line number Diff line change
Expand Up @@ -18,6 +18,7 @@
Devices,
Disk,
Metadata,
Network,
Comment thread
azhivovk marked this conversation as resolved.
SpecDisk,
VMISpec,
VMSpec,
Expand Down Expand Up @@ -115,6 +116,19 @@ def update_template_annotations(self, template_annotations: dict[str, str]) -> N
}
ResourceEditor(patches=patches).update()

def set_networks(self, networks: list[Network]) -> None:
Comment thread
azhivovk marked this conversation as resolved.
"""Replace all secondary networks in the VM spec with a single atomic patch.
Comment thread
azhivovk marked this conversation as resolved.

Updates the in-memory spec first so the object stays consistent with the cluster
without requiring a re-fetch after the patch.

Args:
networks: Full list of Network entries to apply (including the pod network).
"""
self._spec.template.spec.networks = networks
serialized = [asdict(obj=net, dict_factory=self._filter_out_none_values) for net in networks]
ResourceEditor(patches={self: {"spec": {"template": {"spec": {"networks": serialized}}}}}).update()

def set_template_affinity(self, affinity: Affinity | None) -> None:
"""Replace the VM template affinity.

Expand Down
3 changes: 3 additions & 0 deletions tests/network/l2_bridge/libl2bridge.py
Original file line number Diff line number Diff line change
Expand Up @@ -43,6 +43,9 @@

RHCOS9_WORKER_LABEL: Final[str] = f"{NODE_ROLE_KUBERNETES_IO}/worker-rhcos9"

LINUX_BRIDGE_IFACE_NAME_1: Final[str] = "linux-bridge-1"
LINUX_BRIDGE_IFACE_NAME_2: Final[str] = "linux-bridge-2"


NETWORK_MANAGER_UNMANAGE_RUNCMD = [
'sudo echo -e "[main]\nno-auto-default=*\nignore-carrier=*" > /etc/NetworkManager/conf.d/no-nm-ownership.conf',
Expand Down
84 changes: 84 additions & 0 deletions tests/network/l2_bridge/nad_ref_change/conftest.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,84 @@
from collections.abc import Generator
Comment thread
azhivovk marked this conversation as resolved.

import pytest
from kubernetes.dynamic import DynamicClient
from ocp_resources.namespace import Namespace

from libs.net.ip import random_cidr_addresses_by_family
from libs.net.netattachdef import CNIPluginBridgeConfig, NetConfig, NetworkAttachmentDefinition
from libs.net.vmspec import wait_for_ifaces_status
from libs.vm.vm import BaseVirtualMachine
from tests.network.l2_bridge.libl2bridge import LINUX_BRIDGE_IFACE_NAME_1, LINUX_BRIDGE_IFACE_NAME_2
from tests.network.l2_bridge.nad_ref_change.lib_helpers import (
NET_SEED,
two_secondary_bridge_vm,
)
from tests.network.libs import nodenetworkconfigurationpolicy as libnncp
from tests.network.libs.connectivity import ARP_ISOLATION_SYSCTL_CMD


@pytest.fixture(scope="module")
def bridge_nad_a(
admin_client: DynamicClient,
namespace: Namespace,
bridge_nncp: libnncp.NodeNetworkConfigurationPolicy,
vlan_index_number: Generator[int],
) -> Generator[NetworkAttachmentDefinition]:
bridge = bridge_nncp.desired_state_spec.interfaces[0].name # type: ignore
Comment thread
azhivovk marked this conversation as resolved.
with NetworkAttachmentDefinition(
name="nad-vlan-a",
namespace=namespace.name,
config=NetConfig(
name="nad-vlan-a", plugins=[CNIPluginBridgeConfig(bridge=bridge, vlan=next(vlan_index_number))]
),
client=admin_client,
) as nad:
yield nad


@pytest.fixture(scope="module")
def bridge_nad_b(
admin_client: DynamicClient,
namespace: Namespace,
bridge_nncp: libnncp.NodeNetworkConfigurationPolicy,
vlan_index_number: Generator[int],
) -> Generator[NetworkAttachmentDefinition]:
bridge = bridge_nncp.desired_state_spec.interfaces[0].name # type: ignore[union-attr, index]
with NetworkAttachmentDefinition(
name="nad-vlan-b",
namespace=namespace.name,
config=NetConfig(
name="nad-vlan-b", plugins=[CNIPluginBridgeConfig(bridge=bridge, vlan=next(vlan_index_number))]
),
client=admin_client,
) as nad:
yield nad


@pytest.fixture(scope="class")
def under_test_vm_two_ifaces(
namespace: Namespace,
unprivileged_client: DynamicClient,
bridge_nad_a: NetworkAttachmentDefinition,
) -> Generator[BaseVirtualMachine]:
iface_a_ips = random_cidr_addresses_by_family(net_seed=NET_SEED, host_address=3)
iface_b_ips = random_cidr_addresses_by_family(net_seed=NET_SEED, host_address=4)
with two_secondary_bridge_vm(
namespace=namespace.name,
name="under-test-vm-two-ifaces",
client=unprivileged_client,
nad_names=[bridge_nad_a.name, bridge_nad_a.name],
ip_addresses=[iface_a_ips, iface_b_ips],
iface_names=[LINUX_BRIDGE_IFACE_NAME_1, LINUX_BRIDGE_IFACE_NAME_2],
runcmd=ARP_ISOLATION_SYSCTL_CMD,
) as vm:
vm.start(wait=True)
vm.wait_for_agent_connected()
wait_for_ifaces_status(
vm=vm,
ip_addresses_by_spec_net_name={
LINUX_BRIDGE_IFACE_NAME_1: [addr.split("/")[0] for addr in iface_a_ips],
LINUX_BRIDGE_IFACE_NAME_2: [addr.split("/")[0] for addr in iface_b_ips],
},
)
yield vm
Comment thread
azhivovk marked this conversation as resolved.
99 changes: 99 additions & 0 deletions tests/network/l2_bridge/nad_ref_change/lib_helpers.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,99 @@
from copy import deepcopy
Comment thread
coderabbitai[bot] marked this conversation as resolved.
Comment thread
azhivovk marked this conversation as resolved.
from typing import Final

from kubernetes.dynamic import DynamicClient

from libs.net.vmspec import wait_for_no_vmi_condition, wait_for_vmi_condition_status
from libs.vm.factory import base_vmspec, fedora_vm
from libs.vm.spec import (
CloudInitNoCloud,
Devices,
Interface,
Multus,
Network,
)
from libs.vm.vm import BaseVirtualMachine, add_volume_disk, cloudinitdisk_storage
from tests.network.libs import cloudinit
from tests.network.libs.cloudinit import primary_iface_cloud_init

NET_SEED: Final[int] = 0


GUEST_IFACE_1: Final[str] = "eth1"
GUEST_IFACE_2: Final[str] = "eth2"
Comment thread
azhivovk marked this conversation as resolved.


def update_nad_references(vm: BaseVirtualMachine, nad_name_by_net: dict[str, str]) -> None:
Comment thread
azhivovk marked this conversation as resolved.
"""Update secondary network NAD references and wait for the change to be fully applied.

Patches the VM spec atomically, then waits for the MigrationRequired condition to
appear (change detected) and disappear (migration completed).

Args:
vm: The virtual machine to update.
nad_name_by_net: Mapping of interface name to new NAD name.
"""
resource_version = vm.vmi.instance.metadata.resourceVersion
networks = deepcopy(vm.template_spec.networks) or []
for network in networks:
if network.name in nad_name_by_net and network.multus:
network.multus.networkName = nad_name_by_net[network.name]
vm.set_networks(networks=networks)
Comment thread
azhivovk marked this conversation as resolved.
wait_for_vmi_condition_status(vm=vm, condition="MigrationRequired", resource_version=resource_version)
wait_for_no_vmi_condition(vm=vm, condition="MigrationRequired")


def two_secondary_bridge_vm(
Comment thread
azhivovk marked this conversation as resolved.
namespace: str,
name: str,
client: DynamicClient,
nad_names: list[str],
ip_addresses: list[list[str]],
iface_names: list[str],
runcmd: list[str] | None = None,
) -> BaseVirtualMachine:
"""Create a Fedora VM with a masquerade primary interface and bridge-bound secondary interfaces.

Interface layout in guest OS:
eth0 = masquerade (pod network, primary — handles default route and IPv6)
eth1 = first secondary bridge interface
eth2 = second secondary bridge interface (if present)

Args:
namespace: Namespace to deploy the VM in.
name: VM name.
client: Kubernetes dynamic client.
nad_names: NAD names (multus networkName) for the secondary interfaces, in spec order.
ip_addresses: Per-interface CIDR address lists, aligned with nad_names.
Comment thread
azhivovk marked this conversation as resolved.
Each inner list contains one address per supported IP family.
iface_names: Logical interface names for the VM spec, aligned with nad_names.
runcmd: Commands to run on first boot via cloud-init runcmd. None means no extra commands.
"""
spec = base_vmspec()
spec.template.spec.domain.devices = Devices(
interfaces=[
Interface(name="default", masquerade={}),
*[Interface(name=iface_name, bridge={}) for iface_name in iface_names],
]
)
spec.template.spec.networks = [
Network(name="default", pod={}),
*[
Network(name=iface_name, multus=Multus(networkName=nad_name))
for iface_name, nad_name in zip(iface_names, nad_names)
Comment thread
azhivovk marked this conversation as resolved.
],
Comment thread
azhivovk marked this conversation as resolved.
]
ethernets = {}
if primary := primary_iface_cloud_init():
ethernets["eth0"] = primary
for i, addresses in enumerate(ip_addresses):
ethernets[f"eth{i + 1}"] = cloudinit.EthernetDevice(addresses=addresses)
Comment thread
azhivovk marked this conversation as resolved.
userdata = cloudinit.UserData(users=[], runcmd=runcmd)
disk, volume = cloudinitdisk_storage(
data=CloudInitNoCloud(
networkData=cloudinit.asyaml(no_cloud=cloudinit.NetworkData(ethernets=ethernets)) if ethernets else "",
userData=cloudinit.format_cloud_config(userdata=userdata),
)
)
spec.template.spec = add_volume_disk(vmi_spec=spec.template.spec, volume=volume, disk=disk)
return fedora_vm(namespace=namespace, name=name, client=client, spec=spec)
Loading