Skip to content
Closed
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
12 changes: 11 additions & 1 deletion src/libs/kata-types/src/annotations/mod.rs
Original file line number Diff line number Diff line change
Expand Up @@ -17,7 +17,9 @@ use crate::config::TomlConfig;
use crate::initdata::add_hypervisor_initdata_overrides;
use crate::sl;

use self::cri_containerd::{SANDBOX_CPU_PERIOD_KEY, SANDBOX_CPU_QUOTA_KEY, SANDBOX_MEM_KEY};
use self::cri_containerd::{
SANDBOX_CPU_PERIOD_KEY, SANDBOX_CPU_QUOTA_KEY, SANDBOX_CPU_SHARE_KEY, SANDBOX_MEM_KEY,
};

/// CRI-containerd specific annotations.
pub mod cri_containerd;
Expand Down Expand Up @@ -443,6 +445,14 @@ impl Annotation {
value.unwrap_or(0)
}

/// Get the annotation of cpu shares for sandbox
pub fn get_sandbox_cpu_shares(&self) -> u64 {
let value = self
.get_value::<u64>(SANDBOX_CPU_SHARE_KEY)
.unwrap_or(Some(0));
value.unwrap_or(0)
}

/// Get the annotation of memory for sandbox
pub fn get_sandbox_mem(&self) -> i64 {
let value = self.get_value::<i64>(SANDBOX_MEM_KEY).unwrap_or(Some(0));
Expand Down
18 changes: 18 additions & 0 deletions src/runtime-rs/crates/resource/src/cpu_mem/cpu.rs
Original file line number Diff line number Diff line change
Expand Up @@ -168,6 +168,24 @@ impl CpuResource {
return Ok(cpuset_vcpu.len() as f32);
}

// Fallback to CPU shares when quota and cpuset are both absent.
// This handles pods with no resource limits but with CPU shares set.
// Mirrors the Go runtime's CalculateCPUsF() fallback logic.
if total_quota == 0.0 && cpuset_vcpu.is_empty() {
let total_shares: u64 = resources
.values()
.map(|cpu_resource| cpu_resource.shares())
.sum();
if total_shares > 0 {
let shares_vcpu = total_shares as f32 / 1024.0;
info!(
sl!(),
"(from cpu_shares) get vcpus # {}", shares_vcpu
);
return Ok(shares_vcpu.max(1.0));
}
}

// When quota is set: calculate vCPUs as quota/period after normalization
if total_quota > 0.0 && max_period > 0.0 {
let quota_vcpu = total_quota / max_period;
Expand Down
77 changes: 77 additions & 0 deletions src/runtime-rs/crates/resource/src/cpu_mem/initial_size.rs
Original file line number Diff line number Diff line change
Expand Up @@ -31,6 +31,7 @@ impl TryFrom<&HashMap<String, String>> for InitialSize {
let mut vcpu: f32 = 0.0;

let annotation = Annotation::new(an.clone());
let shares = annotation.get_sandbox_cpu_shares();
let (period, quota, memory) =
get_sizing_info(annotation).context("failed to get sizing info")?;
let mut cpu = oci::LinuxCpu::default();
Expand All @@ -42,6 +43,16 @@ impl TryFrom<&HashMap<String, String>> for InitialSize {
if let Ok(cpu_resource) = LinuxContainerCpuResources::try_from(&cpu) {
vcpu = get_nr_vcpu(&cpu_resource);
}

// When neither quota/period nor cpuset constrain the CPU, fall back to
// the sandbox CPU-shares annotation (mirrors Go's CalculateSandboxSizing
// calling CalculateCPUsF(quota, period, shares)).
if vcpu == 0.0 {
if shares > 0 {
vcpu = shares as f32 / 1024.0;
}
}

let mem_mb = convert_memory_to_mb(memory);

Ok(Self {
Expand Down Expand Up @@ -142,6 +153,7 @@ impl InitialSizeManager {

if self.resource.vcpu > 0.0 {
info!(sl!(), "resource with vcpu {}", self.resource.vcpu);
hv.cpu_info.default_vcpus = self.resource.vcpu;
}
self.resource.orig_toml_default_mem = hv.memory_info.default_memory;
if self.resource.mem_mb > 0 {
Expand Down Expand Up @@ -259,6 +271,71 @@ mod tests {
.to_vec()
}

// Test that sandbox CPU-shares annotation is used as vcpu fallback when
// quota and period are absent. Mirrors Go's CalculateCPUsF(0, 0, shares).
#[test]
fn test_initial_size_sandbox_cpu_shares_fallback() {
let cases: &[(&str, u64, f32)] = &[
("1024 shares = 1.0 vcpu", 1024, 1.0),
("2048 shares = 2.0 vcpu", 2048, 2.0),
("512 shares = 0.5 vcpu", 512, 0.5),
("0 shares = 0.0 vcpu (no fallback)", 0, 0.0),
];

for (desc, shares, expected_vcpu) in cases {
let annotations = HashMap::from([
(
cri_containerd::CONTAINER_TYPE_LABEL_KEY.to_string(),
cri_containerd::SANDBOX.to_string(),
),
(
cri_containerd::SANDBOX_CPU_SHARE_KEY.to_string(),
format!("{}", shares),
),
]);

let initial_size = InitialSize::try_from(&annotations).unwrap();
assert_eq!(
initial_size.vcpu, *expected_vcpu,
"{}: got vcpu={}, expected {}",
desc, initial_size.vcpu, expected_vcpu
);
}
}

// Verify quota/period take precedence over shares when both are present.
#[test]
fn test_initial_size_sandbox_quota_wins_over_shares() {
let annotations = HashMap::from([
(
cri_containerd::CONTAINER_TYPE_LABEL_KEY.to_string(),
cri_containerd::SANDBOX.to_string(),
),
(
cri_containerd::SANDBOX_CPU_PERIOD_KEY.to_string(),
"100000".to_string(),
),
(
cri_containerd::SANDBOX_CPU_QUOTA_KEY.to_string(),
"220000".to_string(),
),
// shares set too — quota should win
(
cri_containerd::SANDBOX_CPU_SHARE_KEY.to_string(),
"4096".to_string(), // would be 4.0 if shares were used
),
]);

let initial_size = InitialSize::try_from(&annotations).unwrap();
// 220_000/100_000 = 2.2 → ceil = 3.0, not 4.0 from shares
assert_eq!(
initial_size.vcpu.ceil(),
3.0,
"quota should take precedence over shares, got vcpu={}",
initial_size.vcpu
);
}

#[test]
fn test_initial_size_sandbox() {
let tests = get_test_data();
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -33,6 +33,12 @@ pub struct VethEndpointState {
pub network_qos: bool,
}

#[derive(Serialize, Deserialize, Clone, Default)]
pub struct NetkitEndpointState {
pub if_name: String,
pub network_qos: bool,
}

#[derive(Serialize, Deserialize, Clone, Default)]
pub struct IpVlanEndpointState {
pub if_name: String,
Expand All @@ -54,6 +60,7 @@ pub struct VhostUserEndpointState {
pub struct EndpointState {
pub physical_endpoint: Option<PhysicalEndpointState>,
pub veth_endpoint: Option<VethEndpointState>,
pub netkit_endpoint: Option<NetkitEndpointState>,
pub ipvlan_endpoint: Option<IpVlanEndpointState>,
pub macvlan_endpoint: Option<MacvlanEndpointState>,
pub vlan_endpoint: Option<VlanEndpointState>,
Expand Down
2 changes: 2 additions & 0 deletions src/runtime-rs/crates/resource/src/network/endpoint/mod.rs
Original file line number Diff line number Diff line change
Expand Up @@ -8,6 +8,8 @@ mod physical_endpoint;
pub use physical_endpoint::PhysicalEndpoint;
mod veth_endpoint;
pub use veth_endpoint::VethEndpoint;
mod netkit_endpoint;
pub use netkit_endpoint::NetkitEndpoint;
mod ipvlan_endpoint;
pub use ipvlan_endpoint::IPVlanEndpoint;
mod vlan_endpoint;
Expand Down
115 changes: 115 additions & 0 deletions src/runtime-rs/crates/resource/src/network/endpoint/netkit_endpoint.rs
Original file line number Diff line number Diff line change
@@ -0,0 +1,115 @@
// Copyright (c) 2019-2022 Alibaba Cloud
// Copyright (c) 2019-2022 Ant Group
//
// SPDX-License-Identifier: Apache-2.0
//

use std::io::{self, Error};
use std::sync::Arc;

use anyhow::{Context, Result};
use async_trait::async_trait;
use hypervisor::device::device_manager::{do_handle_device, DeviceManager};
use hypervisor::device::driver::NetworkConfig;
use hypervisor::device::{DeviceConfig, DeviceType};
use hypervisor::{Hypervisor, NetworkDevice};
use tokio::sync::RwLock;

use super::endpoint_persist::{EndpointState, NetkitEndpointState};
use super::Endpoint;
use crate::network::{utils, NetworkPair};

#[derive(Debug)]
pub struct NetkitEndpoint {
pub(crate) net_pair: NetworkPair,
pub(crate) d: Arc<RwLock<DeviceManager>>,
}

impl NetkitEndpoint {
pub async fn new(
d: &Arc<RwLock<DeviceManager>>,
handle: &rtnetlink::Handle,
name: &str,
idx: u32,
model: &str,
queues: usize,
) -> Result<Self> {
let net_pair = NetworkPair::new(handle, idx, name, model, queues)
.await
.context("new network interface pair failed.")?;

Ok(NetkitEndpoint {
net_pair,
d: d.clone(),
})
}

fn get_network_config(&self) -> Result<NetworkConfig> {
let iface = &self.net_pair.tap.tap_iface;
let guest_mac = utils::parse_mac(&iface.hard_addr).ok_or_else(|| {
Error::new(
io::ErrorKind::InvalidData,
format!("hard_addr {}", &iface.hard_addr),
)
})?;

Ok(NetworkConfig {
host_dev_name: iface.name.clone(),
virt_iface_name: self.net_pair.virt_iface.name.clone(),
guest_mac: Some(guest_mac),
..Default::default()
})
}
}

#[async_trait]
impl Endpoint for NetkitEndpoint {
async fn name(&self) -> String {
self.net_pair.virt_iface.name.clone()
}

async fn hardware_addr(&self) -> String {
self.net_pair.tap.tap_iface.hard_addr.clone()
}

async fn attach(&self) -> Result<()> {
self.net_pair
.add_network_model()
.await
.context("add network model")?;

let config = self.get_network_config().context("get network config")?;
do_handle_device(&self.d, &DeviceConfig::NetworkCfg(config))
.await
.context("do handle network netkit endpoint device failed.")?;

Ok(())
}

async fn detach(&self, h: &dyn Hypervisor) -> Result<()> {
self.net_pair
.del_network_model()
.await
.context("del network model failed.")?;

let config = self.get_network_config().context("get network config")?;
h.remove_device(DeviceType::Network(NetworkDevice {
config,
..Default::default()
}))
.await
.context("remove netkit endpoint device by hypervisor failed.")?;

Ok(())
}

async fn save(&self) -> Option<EndpointState> {
Some(EndpointState {
netkit_endpoint: Some(NetkitEndpointState {
if_name: self.net_pair.virt_iface.name.clone(),
network_qos: self.net_pair.network_qos,
}),
..Default::default()
})
}
}
23 changes: 22 additions & 1 deletion src/runtime-rs/crates/resource/src/network/network_with_netns.rs
Original file line number Diff line number Diff line change
Expand Up @@ -24,7 +24,8 @@ use tokio::sync::RwLock;

use super::{
endpoint::{
Endpoint, IPVlanEndpoint, MacVlanEndpoint, PhysicalEndpoint, VethEndpoint, VlanEndpoint,
Endpoint, IPVlanEndpoint, MacVlanEndpoint, NetkitEndpoint, PhysicalEndpoint, VethEndpoint,
VlanEndpoint,
},
network_entity::NetworkEntity,
network_info::network_info_from_link::{handle_addresses, NetworkInfoFromLink},
Expand Down Expand Up @@ -278,6 +279,26 @@ async fn create_endpoint(
.context("macvlan endpoint")?;
Arc::new(ret)
}
"netkit" => {
// L3 mode netkit devices have no MAC address and are not supported.
if attrs.hardware_addr.is_empty() {
return Err(anyhow!(
"netkit device {} has no MAC address (L3 mode not supported - use L2 mode or veth)",
attrs.name
));
}
let ret = NetkitEndpoint::new(
&d,
handle,
&attrs.name,
idx,
&config.network_model,
config.queues,
)
.await
.context("netkit endpoint")?;
Arc::new(ret)
}
_ => return Err(anyhow!("unsupported link type: {}", link_type)),
}
};
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -120,6 +120,11 @@ fn link_info(mut infos: Vec<LinkInfo>) -> Box<dyn Link> {
link = Some(Box::new(Bridge::default()));
}
}
InfoKind::Netkit => {
if link.is_none() {
link = Some(Box::new(Netkit::default()));
}
}
_ => {
if link.is_none() {
link = Some(Box::new(Device::default()));
Expand All @@ -145,6 +150,9 @@ fn link_info(mut infos: Vec<LinkInfo>) -> Box<dyn Link> {
InfoData::Bridge(ibs) => {
link = Some(Box::new(parse_bridge(ibs)));
}
InfoData::Netkit(_) => {
link = Some(Box::new(Netkit::default()));
}
_ => {
link = Some(Box::new(Device::default()));
}
Expand Down Expand Up @@ -214,6 +222,7 @@ macro_rules! define_and_impl_network_dev {
define_and_impl_network_dev!("device", Device);
define_and_impl_network_dev!("tuntap", Tuntap);
define_and_impl_network_dev!("veth", Veth);
define_and_impl_network_dev!("netkit", Netkit);
define_and_impl_network_dev!("ipvlan", IpVlan);
define_and_impl_network_dev!("macvlan", MacVlan);
define_and_impl_network_dev!("vlan", Vlan);
Expand Down
Loading
Loading