Skip to content

Commit 50f4539

Browse files
authored
Don't break docker networking (#104)
* Don't break docker networking * Add test * address review comments **Fixed (2):** - **"Hypeman rule scan may miss comments"** — Added `-v` flag to the `iptables -L` call in `lastHypemanForwardRulePosition`, matching what `isForwardRuleCorrect` already does. Without `-v`, some iptables versions don't show comment text. - **"Test can leave host iptables modified"** — Added a `t.Cleanup` that checks if the DOCKER-FORWARD jump is missing and restores it, so a mid-test failure won't leave the host broken. **Dismissed with inline comments (2):** - **"Docker jump may bypass DOCKER-USER rules"** — Added a comment on `ensureDockerForwardJump` explaining that it only inserts when the jump is completely absent (the `-C` check returns early otherwise), so it can't reorder existing Docker rules. - **"Docker iptables test lacks privilege guard"** — Added a comment on the test explaining that `make test-linux` runs the entire suite under `sudo`, so iptables permissions are always available.
1 parent f8f791b commit 50f4539

2 files changed

Lines changed: 120 additions & 0 deletions

File tree

lib/instances/network_test.go

Lines changed: 43 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -4,6 +4,7 @@ import (
44
"bytes"
55
"context"
66
"os"
7+
"os/exec"
78
"strings"
89
"testing"
910
"time"
@@ -59,6 +60,48 @@ func TestCreateInstanceWithNetwork(t *testing.T) {
5960
require.NoError(t, err)
6061
t.Log("Network initialized")
6162

63+
// Verify that ensureDockerForwardJump restores Docker's FORWARD chain
64+
// when it gets wiped (e.g., by a hypervisor firewall rebuild).
65+
// Note: no extra privilege guard needed — make test-linux runs the entire
66+
// suite under sudo, so iptables commands have the required permissions.
67+
t.Run("DockerForwardChainRestored", func(t *testing.T) {
68+
// Check if DOCKER-FORWARD chain exists (Docker must be running on host)
69+
checkChain := exec.Command("iptables", "-L", "DOCKER-FORWARD", "-n")
70+
if checkChain.Run() != nil {
71+
t.Skip("DOCKER-FORWARD chain not present (Docker not running), skipping")
72+
}
73+
74+
// Verify jump currently exists
75+
checkJump := exec.Command("iptables", "-C", "FORWARD", "-j", "DOCKER-FORWARD")
76+
require.NoError(t, checkJump.Run(), "DOCKER-FORWARD jump should exist before test")
77+
78+
// Safety net: restore the jump if the test fails or aborts after we delete it,
79+
// so we don't leave the host's Docker networking broken.
80+
t.Cleanup(func() {
81+
check := exec.Command("iptables", "-C", "FORWARD", "-j", "DOCKER-FORWARD")
82+
if check.Run() != nil {
83+
restore := exec.Command("iptables", "-A", "FORWARD", "-j", "DOCKER-FORWARD")
84+
_ = restore.Run()
85+
}
86+
})
87+
88+
// Simulate the hypervisor flush: delete the jump
89+
delJump := exec.Command("iptables", "-D", "FORWARD", "-j", "DOCKER-FORWARD")
90+
require.NoError(t, delJump.Run(), "should be able to delete DOCKER-FORWARD jump")
91+
92+
// Confirm it's gone
93+
checkGone := exec.Command("iptables", "-C", "FORWARD", "-j", "DOCKER-FORWARD")
94+
require.Error(t, checkGone.Run(), "DOCKER-FORWARD jump should be gone after delete")
95+
96+
// Re-initialize network — this should restore the jump
97+
err := manager.networkManager.Initialize(ctx, nil)
98+
require.NoError(t, err)
99+
100+
// Verify jump is restored
101+
checkRestored := exec.Command("iptables", "-C", "FORWARD", "-j", "DOCKER-FORWARD")
102+
require.NoError(t, checkRestored.Run(), "ensureDockerForwardJump should have restored the DOCKER-FORWARD jump")
103+
})
104+
62105
// Create instance with nginx:alpine and default network
63106
t.Log("Creating instance with default network...")
64107
inst, err := manager.CreateInstance(ctx, CreateInstanceRequest{

lib/network/bridge_linux.go

Lines changed: 77 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -251,6 +251,13 @@ func (m *manager) setupIPTablesRules(ctx context.Context, subnet, bridgeName str
251251

252252
log.InfoContext(ctx, "iptables FORWARD ready", "outbound", fwdOutStatus, "inbound", fwdInStatus)
253253

254+
// Restore Docker's FORWARD chain jumps if they were lost.
255+
// On systems where an external tool (e.g., hypervisor firewall management) periodically
256+
// rebuilds the FORWARD chain, Docker's jump rules can be wiped out. Docker only inserts
257+
// them at daemon start, so they stay missing until Docker is restarted. Since hypeman
258+
// already re-ensures its own rules here, we also restore Docker's if needed.
259+
m.ensureDockerForwardJump(ctx)
260+
254261
return nil
255262
}
256263

@@ -409,6 +416,76 @@ func (m *manager) deleteForwardRuleByComment(comment string) {
409416
}
410417
}
411418

419+
// ensureDockerForwardJump checks if Docker's DOCKER-FORWARD chain exists but is
420+
// unreachable from the FORWARD chain, and restores the jump if missing.
421+
// This is a no-op if Docker is not installed or the jump already exists.
422+
//
423+
// Note: this cannot mis-order DOCKER-FORWARD vs DOCKER-USER because it only acts
424+
// when the jump is completely absent (chain was flushed). If DOCKER-USER's jump
425+
// still exists, DOCKER-FORWARD's jump is almost certainly still there too — they
426+
// get wiped together — and the early -C check returns before we insert anything.
427+
func (m *manager) ensureDockerForwardJump(ctx context.Context) {
428+
log := logger.FromContext(ctx)
429+
430+
// Check if DOCKER-FORWARD chain exists (Docker is installed and configured)
431+
checkChain := exec.Command("iptables", "-L", "DOCKER-FORWARD", "-n")
432+
checkChain.SysProcAttr = &syscall.SysProcAttr{
433+
AmbientCaps: []uintptr{unix.CAP_NET_ADMIN},
434+
}
435+
if checkChain.Run() != nil {
436+
return // Chain doesn't exist — Docker not installed or not configured
437+
}
438+
439+
// Check if jump already exists in FORWARD
440+
checkJump := exec.Command("iptables", "-C", "FORWARD", "-j", "DOCKER-FORWARD")
441+
checkJump.SysProcAttr = &syscall.SysProcAttr{
442+
AmbientCaps: []uintptr{unix.CAP_NET_ADMIN},
443+
}
444+
if checkJump.Run() == nil {
445+
return // Jump already present
446+
}
447+
448+
// DOCKER-FORWARD chain exists but the jump from FORWARD is missing — restore it.
449+
// Insert right after hypeman's last rule so the jump is evaluated before any
450+
// explicit DROP/REJECT rules that an external firewall tool may have added.
451+
insertPos := m.lastHypemanForwardRulePosition() + 1
452+
addJump := exec.Command("iptables", "-I", "FORWARD", fmt.Sprintf("%d", insertPos), "-j", "DOCKER-FORWARD")
453+
addJump.SysProcAttr = &syscall.SysProcAttr{
454+
AmbientCaps: []uintptr{unix.CAP_NET_ADMIN},
455+
}
456+
if err := addJump.Run(); err != nil {
457+
log.WarnContext(ctx, "failed to restore Docker FORWARD chain jump", "error", err)
458+
return
459+
}
460+
461+
log.WarnContext(ctx, "restored missing jump to DOCKER-FORWARD in FORWARD chain", "position", insertPos)
462+
}
463+
464+
// lastHypemanForwardRulePosition returns the line number of the last hypeman-managed
465+
// rule in the FORWARD chain, or 0 if none are found.
466+
func (m *manager) lastHypemanForwardRulePosition() int {
467+
cmd := exec.Command("iptables", "-L", "FORWARD", "--line-numbers", "-n", "-v")
468+
cmd.SysProcAttr = &syscall.SysProcAttr{
469+
AmbientCaps: []uintptr{unix.CAP_NET_ADMIN},
470+
}
471+
output, err := cmd.Output()
472+
if err != nil {
473+
return 0
474+
}
475+
476+
lastPos := 0
477+
for _, line := range strings.Split(string(output), "\n") {
478+
if !strings.Contains(line, "hypeman-") {
479+
continue
480+
}
481+
var pos int
482+
if _, err := fmt.Sscanf(line, "%d", &pos); err == nil && pos > lastPos {
483+
lastPos = pos
484+
}
485+
}
486+
return lastPos
487+
}
488+
412489
// createTAPDevice creates TAP device and attaches to bridge.
413490
// downloadBps: rate limit for download (external→VM), applied as TBF on TAP egress
414491
// uploadBps/uploadCeilBps: rate limit for upload (VM→external), applied as HTB class on bridge

0 commit comments

Comments
 (0)