Skip to content

Commit 48cd676

Browse files
committed
Test investigate OOM
1 parent 1367a66 commit 48cd676

2 files changed

Lines changed: 109 additions & 1 deletion

File tree

.github/actions/smoke-test/build.sh

Lines changed: 50 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -91,4 +91,54 @@ docker network create -d bridge app-network
9191
################################################
9292
echo "Building Dev Container"
9393
readonly ID_LABEL="test-container=${TEMPLATE_ID}"
94+
95+
# Host memory diagnostics before devcontainer up
96+
echo "=== HOST memory diagnostics before devcontainer up ==="
97+
echo "--- Host system memory overview ---"
98+
free -h
99+
echo ""
100+
echo "--- Host detailed memory info ---"
101+
grep -E "MemTotal|MemFree|MemAvailable|Buffers|Cached|SwapTotal|SwapFree" /proc/meminfo
102+
echo ""
103+
echo "--- Host top 15 processes by memory ---"
104+
ps aux --sort=-%mem | head -16
105+
echo ""
106+
echo "--- Docker system info ---"
107+
docker system df
108+
echo "=== End HOST memory diagnostics ==="
109+
110+
# Run devcontainer up and capture exit code (disable errexit temporarily)
111+
set +e
94112
devcontainer up --id-label "${ID_LABEL}" --workspace-folder "${SRC_DIR}"
113+
DEVCONTAINER_EXIT=$?
114+
set -e
115+
116+
# Post-failure diagnostics
117+
if [[ $DEVCONTAINER_EXIT -ne 0 ]]; then
118+
echo "=== devcontainer up failed with exit code $DEVCONTAINER_EXIT ==="
119+
120+
CONTAINER_ID=$(docker container ls -a -f "label=${ID_LABEL}" -q | head -1)
121+
122+
if [[ -n "$CONTAINER_ID" ]]; then
123+
echo "--- Container state ---"
124+
docker inspect "$CONTAINER_ID" 2>&1 | jq '.[0] | {State, HostConfig: {Memory, MemorySwap, MemoryReservation, PidsLimit, OomKillDisable}}' || echo "inspect failed"
125+
echo ""
126+
echo "--- Container OOMKilled status ---"
127+
docker inspect "$CONTAINER_ID" --format='{{.State.OOMKilled}}' || echo "unknown"
128+
echo ""
129+
echo "--- Container logs (last 100 lines) ---"
130+
docker logs "$CONTAINER_ID" 2>&1 | tail -100 || echo "no logs"
131+
echo ""
132+
echo "--- Full container inspect (Config.Cmd, Entrypoint) ---"
133+
docker inspect "$CONTAINER_ID" 2>&1 | jq '.[0] | {Cmd: .Config.Cmd, Entrypoint: .Config.Entrypoint, Image: .Config.Image}' || echo "inspect failed"
134+
echo ""
135+
fi
136+
137+
echo "--- Host dmesg (last 50 lines, looking for OOM/kill) ---"
138+
dmesg 2>/dev/null | tail -50 || sudo dmesg 2>/dev/null | tail -50 || echo "dmesg not available"
139+
echo ""
140+
echo "--- Host memory after failure ---"
141+
free -h
142+
143+
exit $DEVCONTAINER_EXIT
144+
fi

startupscript/post-startup.sh

Lines changed: 59 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -79,12 +79,70 @@ ${RUN_AS_LOGIN_USER} "ln -sf '${USER_WORKBENCH_CONFIG_DIR}' '${USER_WORKBENCH_LE
7979
exec > >(tee -a "${POST_STARTUP_OUTPUT_FILE}") # Append output to the file and print to terminal
8080
exec 2> >(tee -a "${POST_STARTUP_OUTPUT_FILE}" >&2) # Append errors to the file and print to terminal
8181

82+
# Memory diagnostics before package installation
83+
echo "=== Memory diagnostics before apt-get update ==="
84+
echo "--- System memory overview (NOTE: this shows HOST memory, not container limits) ---"
85+
free -h
86+
echo ""
87+
echo "--- Detailed memory info ---"
88+
grep -E "MemTotal|MemFree|MemAvailable|Buffers|Cached|SwapTotal|SwapFree" /proc/meminfo
89+
echo ""
90+
echo "--- Cgroup memory limits (actual container limits) ---"
91+
echo "Cgroup v2 memory.max:"
92+
cat /sys/fs/cgroup/memory.max 2>/dev/null || echo "not available"
93+
echo "Cgroup v2 memory.current:"
94+
cat /sys/fs/cgroup/memory.current 2>/dev/null || echo "not available"
95+
echo "Cgroup v2 memory.swap.max:"
96+
cat /sys/fs/cgroup/memory.swap.max 2>/dev/null || echo "not available"
97+
echo "Cgroup v1 memory.limit_in_bytes:"
98+
cat /sys/fs/cgroup/memory/memory.limit_in_bytes 2>/dev/null || echo "not available"
99+
echo "Cgroup v1 memory.usage_in_bytes:"
100+
cat /sys/fs/cgroup/memory/memory.usage_in_bytes 2>/dev/null || echo "not available"
101+
echo ""
102+
echo "--- Top 20 processes by memory (RSS) ---"
103+
ps aux --sort=-%mem | head -21
104+
echo ""
105+
echo "--- Process tree with memory ---"
106+
ps axo pid,ppid,comm,%mem,rss --sort=-%mem | head -30
107+
echo "=== End memory diagnostics ==="
108+
109+
# Check disk space before package installation
110+
echo "=== Disk space before apt-get ==="
111+
df -h /var /tmp / 2>/dev/null || df -h
112+
echo ""
113+
82114
# The apt package index may not be clean when we run; resynchronize
83115
if type apk > /dev/null 2>&1; then
84116
apk update
85117
apk add --no-cache jq curl fuse tar wget
86118
elif type apt-get > /dev/null 2>&1; then
87-
apt-get update
119+
# Start background memory monitor
120+
echo "=== Starting memory monitor (logging every second) ==="
121+
(
122+
while true; do
123+
echo "[$(date '+%H:%M:%S')] cgroup_mem=$(cat /sys/fs/cgroup/memory.current 2>/dev/null || echo N/A) free_mem=$(grep MemAvailable /proc/meminfo | awk '{print $2}')kB"
124+
sleep 1
125+
done
126+
) &
127+
MONITOR_PID=$!
128+
129+
# Run apt-get update with error handling
130+
echo "=== Running apt-get update ==="
131+
if ! apt-get update; then
132+
APT_EXIT=$?
133+
echo "=== apt-get update failed with exit code $APT_EXIT ==="
134+
echo "=== Checking dmesg for OOM messages ==="
135+
dmesg | tail -50 || echo "dmesg not available"
136+
echo "=== Memory state after failure ==="
137+
free -h
138+
cat /sys/fs/cgroup/memory.current 2>/dev/null || echo "cgroup memory not available"
139+
kill $MONITOR_PID 2>/dev/null || true
140+
exit $APT_EXIT
141+
fi
142+
143+
kill $MONITOR_PID 2>/dev/null || true
144+
echo "=== apt-get update completed successfully ==="
145+
88146
apt install -y jq curl fuse tar wget
89147
else
90148
>&2 echo "ERROR: Unable to find a supported package manager"

0 commit comments

Comments
 (0)