Skip to content

Commit 7f4966d

Browse files
authored
bug: pull pod infra container image if not cached for network isolated cluster (#7155)
1 parent 0c7b9fb commit 7f4966d

104 files changed

Lines changed: 437 additions & 223 deletions

File tree

Some content is hidden

Large Commits have some content hidden by default. Use the searchbox below for content that may be hidden.

e2e/node_config.go

Lines changed: 3 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -23,7 +23,7 @@ var baseKubeletConfig = &aksnodeconfigv1.KubeletConfig{
2323
"--cloud-config": "",
2424
"--cloud-provider": "external",
2525
"--kubeconfig": "/var/lib/kubelet/kubeconfig",
26-
"--pod-infra-container-image": "mcr.microsoft.com/oss/kubernetes/pause:3.6",
26+
"--pod-infra-container-image": "mcr.microsoft.com/oss/v2/kubernetes/pause:3.6",
2727
},
2828
KubeletNodeLabels: map[string]string{
2929
"agentpool": "nodepool2",
@@ -547,7 +547,7 @@ func baseTemplateLinux(t testing.TB, location string, k8sVersion string, arch st
547547
OSImageConfig: map[datamodel.Distro]datamodel.AzureOSImageConfig(nil),
548548
},
549549
K8sComponents: &datamodel.K8sComponents{
550-
PodInfraContainerImageURL: "mcr.microsoft.com/oss/kubernetes/pause:3.6",
550+
PodInfraContainerImageURL: "mcr.microsoft.com/oss/v2/kubernetes/pause:3.6",
551551
HyperkubeImageURL: "mcr.microsoft.com/oss/kubernetes/",
552552
WindowsPackageURL: "windowspackage",
553553
LinuxCredentialProviderURL: "",
@@ -681,7 +681,7 @@ func baseTemplateLinux(t testing.TB, location string, k8sVersion string, arch st
681681
"--max-pods": "110",
682682
"--network-plugin": "kubenet",
683683
"--node-status-update-frequency": "10s",
684-
"--pod-infra-container-image": "mcr.microsoft.com/oss/kubernetes/pause:3.6",
684+
"--pod-infra-container-image": "mcr.microsoft.com/oss/v2/kubernetes/pause:3.6",
685685
"--pod-manifest-path": "/etc/kubernetes/manifests",
686686
"--pod-max-pids": "-1",
687687
"--protect-kernel-defaults": "true",

e2e/scenario_test.go

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -123,6 +123,7 @@ func Test_AzureLinuxV2_AirGap(t *testing.T) {
123123
ContainerRegistryServer: fmt.Sprintf("%s.azurecr.io", config.PrivateACRName(config.Config.DefaultLocation)),
124124
},
125125
}
126+
nbc.KubeletConfig["--pod-infra-container-image"] = "mcr.microsoft.com/oss/v2/kubernetes/pause:3.6"
126127
},
127128
Validator: func(ctx context.Context, s *Scenario) {
128129
ValidateDirectoryContent(ctx, s, "/run", []string{"outbound-check-skipped"})
@@ -768,6 +769,7 @@ func Test_Ubuntu2204_AirGap_NonAnonymousACR(t *testing.T) {
768769
nbc.ContainerService.Properties.OrchestratorProfile.OrchestratorVersion)
769770
nbc.KubeletConfig["--image-credential-provider-config"] = "/var/lib/kubelet/credential-provider-config.yaml"
770771
nbc.KubeletConfig["--image-credential-provider-bin-dir"] = "/var/lib/kubelet/credential-provider"
772+
nbc.KubeletConfig["--pod-infra-container-image"] = "mcr.microsoft.com/oss/v2/kubernetes/pause:3.6"
771773
},
772774
Validator: func(ctx context.Context, s *Scenario) {
773775
ValidateDirectoryContent(ctx, s, "/run", []string{"outbound-check-skipped"})
@@ -842,6 +844,7 @@ func Test_Ubuntu2204Gen2_ContainerdAirgappedNonAnonymousK8sNotCached(t *testing.
842844
nbc.ContainerService.Properties.OrchestratorProfile.OrchestratorVersion)
843845
nbc.KubeletConfig["--image-credential-provider-config"] = "/var/lib/kubelet/credential-provider-config.yaml"
844846
nbc.KubeletConfig["--image-credential-provider-bin-dir"] = "/var/lib/kubelet/credential-provider"
847+
nbc.KubeletConfig["--pod-infra-container-image"] = "mcr.microsoft.com/oss/v2/kubernetes/pause:3.6"
845848
},
846849
Validator: func(ctx context.Context, s *Scenario) {
847850
ValidateDirectoryContent(ctx, s, "/run", []string{"outbound-check-skipped"})

parts/linux/cloud-init/artifacts/cse_config.sh

Lines changed: 43 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -576,6 +576,44 @@ configureKubeletAndKubectl() {
576576
fi
577577
}
578578

579+
ensurePodInfraContainerImage() {
580+
POD_INFRA_CONTAINER_IMAGE_DOWNLOAD_DIR="/opt/pod-infra-container-image/downloads"
581+
POD_INFRA_CONTAINER_IMAGE_TAR="/opt/pod-infra-container-image/pod-infra-container-image.tar"
582+
583+
pod_infra_container_image=$(get_sandbox_image)
584+
585+
echo "Checking if $pod_infra_container_image already exists locally..."
586+
if ctr -n k8s.io images list -q | grep -q "^${pod_infra_container_image}$"; then
587+
echo "Image $pod_infra_container_image already exists locally, skipping pull"
588+
echo "Cached image details:"
589+
return 0
590+
fi
591+
base_name="${pod_infra_container_image%@:*}"
592+
base_name="${pod_infra_container_image%:*}"
593+
tag="local"
594+
595+
image="${pod_infra_container_image//mcr.microsoft.com/${BOOTSTRAP_PROFILE_CONTAINER_REGISTRY_SERVER}}"
596+
acr_url=$(echo "$image" | cut -d/ -f1)
597+
598+
mkdir -p ${POD_INFRA_CONTAINER_IMAGE_DOWNLOAD_DIR}
599+
600+
echo "Pulling with authentication for $image"
601+
retrycmd_cp_oci_layout_with_oras 10 5 "${POD_INFRA_CONTAINER_IMAGE_DOWNLOAD_DIR}" "$tag" "$image" || exit $ERR_PULL_POD_INFRA_CONTAINER_IMAGE
602+
603+
tar -cvf ${POD_INFRA_CONTAINER_IMAGE_TAR} -C ${POD_INFRA_CONTAINER_IMAGE_DOWNLOAD_DIR} .
604+
if ctr -n k8s.io image import --base-name $base_name ${POD_INFRA_CONTAINER_IMAGE_TAR}; then
605+
ctr -n k8s.io image tag "${base_name}:${tag}" "${pod_infra_container_image}"
606+
echo "Successfully imported $pod_infra_container_image"
607+
labelContainerImage "${pod_infra_container_image}" "io.cri-containerd.pinned" "pinned"
608+
else
609+
echo "Failed to import $pod_infra_container_image"
610+
exit $ERR_PULL_POD_INFRA_CONTAINER_IMAGE
611+
fi
612+
613+
rm -rf ${POD_INFRA_CONTAINER_IMAGE_DOWNLOAD_DIR}
614+
rm -f ${POD_INFRA_CONTAINER_IMAGE_TAR}
615+
}
616+
579617
ensureKubelet() {
580618
KUBELET_DEFAULT_FILE=/etc/default/kubelet
581619
mkdir -p /etc/default
@@ -741,6 +779,11 @@ EOF
741779
fi
742780
fi
743781

782+
# kubelet cannot pull pause image from anonymous disabled registry during runtime
783+
if [ -n "${BOOTSTRAP_PROFILE_CONTAINER_REGISTRY_SERVER}" ]; then
784+
logs_to_events "AKS.CSE.ensureKubelet.ensurePodInfraContainerImage" ensurePodInfraContainerImage
785+
fi
786+
744787
# start measure-tls-bootstrapping-latency.service without waiting for the main process to start, while ignoring any failures
745788
if ! systemctlEnableAndStartNoBlock measure-tls-bootstrapping-latency 30; then
746789
echo "failed to start measure-tls-bootstrapping-latency.service"

parts/linux/cloud-init/artifacts/cse_helpers.sh

Lines changed: 66 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -139,6 +139,8 @@ ERR_SECURE_TLS_BOOTSTRAP_START_FAILURE=220 # Error starting the secure TLS boots
139139
ERR_CLOUD_INIT_FAILED=223 # Error indicating that cloud-init returned exit code 1 in cse_cmd.sh
140140
ERR_NVIDIA_DRIVER_INSTALL=224 # Error determining if nvidia driver install should be skipped
141141

142+
ERR_PULL_POD_INFRA_CONTAINER_IMAGE=225 # Error pulling pause image
143+
142144
# For both Ubuntu and Mariner, /etc/*-release should exist.
143145
# For unit tests, the OS and OS_VERSION will be set in the unit test script.
144146
# So whether it's if or else actually doesn't matter to our unit test.
@@ -337,6 +339,28 @@ retrycmd_get_tarball_from_registry_with_oras() {
337339
done
338340
}
339341

342+
retrycmd_cp_oci_layout_with_oras() {
343+
retries=$1; wait_sleep=$2; path=$3; tag=$4; url=$5
344+
mkdir -p "$path"
345+
echo "${retries} retries"
346+
for i in $(seq 1 $retries); do
347+
if [ "$i" -eq "$retries" ]; then
348+
echo "Failed to oras cp $url to $path:$tag after $retries attempts"
349+
return $ERR_PULL_POD_INFRA_CONTAINER_IMAGE
350+
else
351+
if [ "$i" -gt 1 ]; then
352+
sleep $wait_sleep
353+
fi
354+
timeout 120 oras cp "$url" "$path:$tag" --to-oci-layout --from-registry-config ${ORAS_REGISTRY_CONFIG_FILE} > $ORAS_OUTPUT 2>&1
355+
if [ "$?" -ne 0 ]; then
356+
cat $ORAS_OUTPUT
357+
else
358+
return 0
359+
fi
360+
fi
361+
done
362+
}
363+
340364
retrycmd_get_aad_access_token() {
341365
retries=$1; wait_sleep=$2; url=$3
342366
for i in $(seq 1 $retries); do
@@ -1104,4 +1128,46 @@ extract_tarball() {
11041128
esac
11051129
}
11061130

1131+
function get_sandbox_image(){
1132+
sandbox_image=$(get_sandbox_image_from_containerd_config "/etc/containerd/config.toml")
1133+
if [ -z "$sandbox_image" ]; then
1134+
sandbox_image=$(extract_value_from_kubelet_flags "$KUBELET_FLAGS" "pod-infra-container-image")
1135+
fi
1136+
1137+
echo $sandbox_image
1138+
}
1139+
1140+
function extract_value_from_kubelet_flags(){
1141+
local kubelet_flags=$1
1142+
local key=$2
1143+
1144+
key="${key#--}"
1145+
value=$(echo "$kubelet_flags" | sed -n "s/.*--${key}=\([^ ]*\).*/\1/p")
1146+
echo "$value"
1147+
}
1148+
1149+
function get_sandbox_image_from_containerd_config() {
1150+
local config_file=$1
1151+
local sandbox_image=""
1152+
1153+
if [ ! -f "$config_file" ]; then
1154+
echo ""
1155+
return
1156+
fi
1157+
1158+
# Extract sandbox_image value from the CRI plugin section
1159+
# The sandbox_image is typically under [plugins."io.containerd.grpc.v1.cri"]
1160+
sandbox_image=$(awk '/sandbox_image/ && /=/ {
1161+
# Remove quotes and spaces
1162+
gsub(/[" ]/, "", $3)
1163+
print $3
1164+
}' FS='=' "$config_file")
1165+
1166+
# Alternative method if the above doesn't work
1167+
if [ -z "$sandbox_image" ]; then
1168+
sandbox_image=$(grep -E '^\s*sandbox_image\s*=' "$config_file" | sed 's/.*sandbox_image\s*=\s*"\([^"]*\)".*/\1/')
1169+
fi
1170+
1171+
echo "$sandbox_image"
1172+
}
11071173
#HELPERSEOF

0 commit comments

Comments
 (0)