Skip to content

Commit 1a7220e

Browse files
committed
more fixes on iptables rules and network services, host selection
1 parent 3c95b59 commit 1a7220e

File tree

2 files changed

+106
-55
lines changed

2 files changed

+106
-55
lines changed

extensions/network-namespace/network-namespace-wrapper.sh

Lines changed: 44 additions & 35 deletions
Original file line numberDiff line numberDiff line change
@@ -929,10 +929,14 @@ cmd_assign_ip() {
929929
# After a network restart the veth is recreated with a new MAC address.
930930
# Without a gratuitous ARP the upstream gateway retains the stale ARP entry
931931
# for the old MAC and packets cannot reach the new veth.
932-
if command -v arping >/dev/null 2>&1; then
933-
ip netns exec "${NAMESPACE}" arping -c 3 -U -I "${pveth_n}" "${PUBLIC_IP}" \
932+
# Use _find_arping to locate the binary in PATH and common sbin locations.
933+
local _arping_bin; _arping_bin=$(_find_arping) || true
934+
if [ -n "${_arping_bin}" ]; then
935+
ip netns exec "${NAMESPACE}" "${_arping_bin}" -c 3 -U -I "${pveth_n}" "${PUBLIC_IP}" \
934936
>/dev/null 2>&1 || true
935937
log "assign-ip: sent gratuitous ARP for ${PUBLIC_IP} on ${pveth_n}"
938+
else
939+
log "assign-ip: arping not available — skipping gratuitous ARP for ${PUBLIC_IP}"
936940
fi
937941

938942
# ---- Default route inside namespace toward upstream gateway ----
@@ -1324,6 +1328,19 @@ _apache2_user() {
13241328
echo "nobody"
13251329
}
13261330

1331+
# Locate the arping binary; checks PATH first, then common sbin paths.
1332+
# Prints the path and returns 0 on success, returns 1 when not found.
1333+
_find_arping() {
1334+
local bin
1335+
for bin in arping /usr/bin/arping /usr/sbin/arping /sbin/arping; do
1336+
if command -v "${bin}" >/dev/null 2>&1 || [ -x "${bin}" ]; then
1337+
echo "${bin}"
1338+
return 0
1339+
fi
1340+
done
1341+
return 1
1342+
}
1343+
13271344
##############################################################################
13281345
# Helpers: dnsmasq (DHCP + DNS via the same process)
13291346
##############################################################################
@@ -1520,8 +1537,6 @@ _svc_stop_haproxy() {
15201537
# Helpers: apache2 (userdata / metadata HTTP service)
15211538
#
15221539
# apache2 runs inside the namespace, listening on <EXTENSION_IP>:80.
1523-
# An iptables DNAT rule inside the namespace redirects requests destined for
1524-
# 169.254.169.254:80 to <EXTENSION_IP>:80 so VMs can use the standard metadata URL.
15251540
# EXTENSION_IP equals the network gateway when SourceNat/Gateway is enabled,
15261541
# or a dedicated placeholder IP otherwise. Falls back to GATEWAY when absent.
15271542
#
@@ -1648,21 +1663,13 @@ _svc_start_or_reload_apache2() {
16481663
fi
16491664
fi
16501665

1651-
# DNAT 169.254.169.254:80 → EXTENSION_IP:80 (idempotent)
1652-
# Use EXTENSION_IP as the metadata server address; fall back to GATEWAY.
1653-
local meta_ip; meta_ip="${EXTENSION_IP:-${GATEWAY}}"
1654-
ip netns exec "${NAMESPACE}" iptables -t nat \
1655-
-C PREROUTING -d 169.254.169.254/32 -p tcp --dport 80 \
1656-
-j DNAT --to-destination "${meta_ip}:80" 2>/dev/null || \
1657-
ip netns exec "${NAMESPACE}" iptables -t nat \
1658-
-A PREROUTING -d 169.254.169.254/32 -p tcp --dport 80 \
1659-
-j DNAT --to-destination "${meta_ip}:80"
1660-
1661-
# Allow metadata traffic inbound to the namespace (INPUT)
1662-
ip netns exec "${NAMESPACE}" iptables -t filter \
1663-
-C INPUT -p tcp --dport 80 -j ACCEPT 2>/dev/null || \
1664-
ip netns exec "${NAMESPACE}" iptables -t filter \
1665-
-A INPUT -p tcp --dport 80 -j ACCEPT
1666+
# Allow metadata traffic inbound to the namespace (INPUT) from guest subnet only.
1667+
if [ -n "${CIDR}" ]; then
1668+
ip netns exec "${NAMESPACE}" iptables -t filter \
1669+
-C INPUT -p tcp -s "${CIDR}" --dport 80 -j ACCEPT 2>/dev/null || \
1670+
ip netns exec "${NAMESPACE}" iptables -t filter \
1671+
-A INPUT -p tcp -s "${CIDR}" --dport 80 -j ACCEPT
1672+
fi
16661673
}
16671674

16681675
_svc_stop_apache2() {
@@ -1782,26 +1789,28 @@ server.serve_forever()
17821789
PYEOF
17831790
chmod +x "${script_f}"
17841791

1785-
# Skip restart if already running
1792+
# Only start if not already running; the iptables rule is (re-)applied regardless
1793+
# so that it is always present in the current namespace after a cleanup restart.
17861794
if [ -f "${pid_f}" ] && kill -0 "$(cat "${pid_f}")" 2>/dev/null; then
17871795
log "passwd-server: already running (pid=$(cat "${pid_f}"))"
1788-
return 0
1796+
else
1797+
# Use EXTENSION_IP as the listen address; fall back to GATEWAY when absent.
1798+
local listen_ip; listen_ip="${EXTENSION_IP:-${GATEWAY}}"
1799+
log "passwd-server: starting in namespace ${NAMESPACE} on ${listen_ip}:8080"
1800+
ip netns exec "${NAMESPACE}" python3 "${script_f}" \
1801+
"${listen_ip}" "${passwd_f}" "${pid_f}" \
1802+
>> "${log_f}" 2>&1 &
1803+
# Brief pause to let the server write its PID
1804+
sleep 0.3
17891805
fi
17901806

1791-
# Use EXTENSION_IP as the listen address; fall back to GATEWAY when absent.
1792-
local listen_ip; listen_ip="${EXTENSION_IP:-${GATEWAY}}"
1793-
log "passwd-server: starting in namespace ${NAMESPACE} on ${listen_ip}:8080"
1794-
ip netns exec "${NAMESPACE}" python3 "${script_f}" \
1795-
"${listen_ip}" "${passwd_f}" "${pid_f}" \
1796-
>> "${log_f}" 2>&1 &
1797-
# Brief pause to let the server write its PID
1798-
sleep 0.3
1799-
1800-
# Allow inbound connections to port 8080 inside the namespace
1801-
ip netns exec "${NAMESPACE}" iptables -t filter \
1802-
-C INPUT -p tcp --dport 8080 -j ACCEPT 2>/dev/null || \
1803-
ip netns exec "${NAMESPACE}" iptables -t filter \
1804-
-A INPUT -p tcp --dport 8080 -j ACCEPT
1807+
# Always ensure the iptables INPUT rule is present (idempotent).
1808+
if [ -n "${CIDR}" ]; then
1809+
ip netns exec "${NAMESPACE}" iptables -t filter \
1810+
-C INPUT -p tcp -s "${CIDR}" --dport 8080 -j ACCEPT 2>/dev/null || \
1811+
ip netns exec "${NAMESPACE}" iptables -t filter \
1812+
-A INPUT -p tcp -s "${CIDR}" --dport 8080 -j ACCEPT
1813+
fi
18051814
}
18061815

18071816
_svc_stop_passwd_server() {

extensions/network-namespace/network-namespace.sh

Lines changed: 62 additions & 20 deletions
Original file line numberDiff line numberDiff line change
@@ -77,8 +77,26 @@ set -euo pipefail
7777

7878
DEFAULT_SSH_PORT=22
7979
DEFAULT_SSH_USER=root
80-
DEFAULT_SCRIPT_PATH=/etc/cloudstack/extensions/network-namespace/network-namespace-wrapper.sh
81-
LOG_FILE=/var/log/cloudstack/management/network-namespace.log
80+
81+
# ---------------------------------------------------------------------------
82+
# The KVM wrapper script is deployed to the **same path** as this entry-point
83+
# on the management server. Resolve $0 to an absolute path so that the
84+
# remote SSH call uses the correct location regardless of how this script was
85+
# invoked (relative path, symlink, etc.).
86+
#
87+
# Callers may still override the remote path via CS_NET_SCRIPT_PATH:
88+
# CS_NET_SCRIPT_PATH=/custom/path/wrapper.sh network-namespace.sh <cmd> ...
89+
# ---------------------------------------------------------------------------
90+
_SELF="$(readlink -f "$0" 2>/dev/null \
91+
|| realpath "$0" 2>/dev/null \
92+
|| echo "$0")"
93+
DEFAULT_SCRIPT_PATH="${_SELF}"
94+
95+
# Derive the log file name from this script's own basename so that a renamed
96+
# deployment (e.g. "my-extension.sh") writes to its own log file instead of
97+
# a hardcoded "network-namespace.log".
98+
_SCRIPT_BASENAME="$(basename "${_SELF}" .sh)"
99+
LOG_FILE="/var/log/cloudstack/management/${_SCRIPT_BASENAME}.log"
82100
TMPDIR_BASE=/tmp
83101

84102
# ---------------------------------------------------------------------------
@@ -264,15 +282,18 @@ if [ "${COMMAND}" = "ensure-network-device" ]; then
264282
die "ensure-network-device: no hosts configured. Set 'hosts' in registerExtension details." 1
265283
fi
266284

267-
# Namespace: VPC networks share one namespace per VPC (cs-net-<vpcId>);
285+
# Namespace names must match those used by the wrapper on the KVM host.
286+
# VPC networks share one namespace per VPC (cs-vpc-<vpcId>);
268287
# standalone isolated networks get their own namespace (cs-net-<networkId>).
269288
if [ -n "${VPC_ID}" ]; then
270-
NAMESPACE="cs-net-${VPC_ID}"
289+
NAMESPACE="cs-vpc-${VPC_ID}"
271290
else
272291
NAMESPACE="cs-net-${NETWORK_ID}"
273292
fi
274293

275-
# Try the previously selected host first (from --current-details or --network-extension-details)
294+
# ---- Step 1: honour the previously selected host (sticky assignment) ----
295+
# This preserves the host–namespace binding across API calls once a network
296+
# has been implemented on a particular KVM host.
276297
CURRENT_HOST=$(json_get "${CURRENT_DETAILS}" "host")
277298
[ -z "${CURRENT_HOST}" ] && CURRENT_HOST=$(json_get "${EXTENSION_DETAILS}" "host")
278299

@@ -298,24 +319,45 @@ if [ "${COMMAND}" = "ensure-network-device" ]; then
298319
done
299320
fi
300321

301-
# Select a new reachable host from the list
302-
for h in "${HOST_LIST[@]}"; do
303-
h="${h// /}"
304-
if host_reachable "${h}"; then
305-
log "ensure-network-device: network=${NETWORK_ID} selected host=${h}"
306-
if [ -n "${VPC_ID}" ]; then
307-
printf '{"host":"%s","namespace":"%s","vpc_id":"%s"}\n' \
308-
"${h}" "${NAMESPACE}" "${VPC_ID}"
309-
else
310-
printf '{"host":"%s","namespace":"%s"}\n' "${h}" "${NAMESPACE}"
311-
fi
312-
exit 0
313-
else
314-
log "ensure-network-device: host ${h} not reachable, trying next"
322+
# ---- Step 2: stable hash-based host selection for new / failed-over networks ----
323+
#
324+
# For VPC networks ALL tiers must land on the same KVM host (they share one
325+
# namespace). Using VPC_ID as the hash key guarantees every tier in a VPC
326+
# hashes to the same preferred index even when its own details are not yet
327+
# stored. For isolated networks the NETWORK_ID is used.
328+
#
329+
# Algorithm: CRC32 of the routing key (via cksum) modulo the host count
330+
# gives a stable preferred index. We probe hosts starting from that index,
331+
# wrapping around, until a reachable one is found. This distributes
332+
# different networks evenly across KVM hosts while remaining deterministic.
333+
_ROUTE_KEY="${VPC_ID:-${NETWORK_ID}}"
334+
_HOST_COUNT="${#HOST_LIST[@]}"
335+
_PREFERRED_IDX=$(printf '%s' "${_ROUTE_KEY}" | cksum | awk -v n="${_HOST_COUNT}" '{print ($1 % n)}')
336+
337+
_SELECTED_HOST=""
338+
_PROBE=0
339+
while [ "${_PROBE}" -lt "${_HOST_COUNT}" ]; do
340+
_IDX=$(( (_PREFERRED_IDX + _PROBE) % _HOST_COUNT ))
341+
_H="${HOST_LIST[$_IDX]// /}"
342+
if host_reachable "${_H}"; then
343+
_SELECTED_HOST="${_H}"
344+
log "ensure-network-device: network=${NETWORK_ID} hash-selected host=${_SELECTED_HOST} (key=${_ROUTE_KEY}, idx=${_IDX})"
345+
break
315346
fi
347+
log "ensure-network-device: host ${_H} not reachable, trying next"
348+
_PROBE=$(( _PROBE + 1 ))
316349
done
317350

318-
die "ensure-network-device: no reachable host found in list: ${HOSTS_CSV:-${SINGLE_HOST}}" 1
351+
[ -z "${_SELECTED_HOST}" ] && \
352+
die "ensure-network-device: no reachable host found in list: ${HOSTS_CSV:-${SINGLE_HOST}}" 1
353+
354+
if [ -n "${VPC_ID}" ]; then
355+
printf '{"host":"%s","namespace":"%s","vpc_id":"%s"}\n' \
356+
"${_SELECTED_HOST}" "${NAMESPACE}" "${VPC_ID}"
357+
else
358+
printf '{"host":"%s","namespace":"%s"}\n' "${_SELECTED_HOST}" "${NAMESPACE}"
359+
fi
360+
exit 0
319361
fi
320362

321363
# ---------------------------------------------------------------------------

0 commit comments

Comments
 (0)