Skip to content

Commit d684d75

Browse files
committed
fix ufw issue
1 parent ec6ab83 commit d684d75

2 files changed

Lines changed: 93 additions & 24 deletions

File tree

v1/providers/nebius/instance.go

Lines changed: 73 additions & 24 deletions
Original file line numberDiff line numberDiff line change
@@ -1582,7 +1582,6 @@ func generateCloudInitUserData(publicKey string, firewallRules v1.FirewallRules)
15821582
script := `#cloud-config
15831583
packages:
15841584
- ufw
1585-
- iptables-persistent
15861585
`
15871586

15881587
// Add SSH key configuration if provided
@@ -1594,33 +1593,19 @@ packages:
15941593

15951594
var commands []string
15961595

1597-
// Fix a systemd race condition: ufw.service and netfilter-persistent.service
1598-
// both start in parallel (both are Before=network-pre.target with no mutual
1599-
// ordering). Both call iptables-restore concurrently, and with the iptables-nft
1600-
// backend the competing nftables transactions cause UFW to fail with
1601-
// "iptables-restore: line 4 failed". This drop-in forces UFW to wait for
1602-
// netfilter-persistent to finish first.
1603-
commands = append(commands,
1604-
"sudo mkdir -p /etc/systemd/system/ufw.service.d",
1605-
`printf '[Unit]\nAfter=netfilter-persistent.service\n' | sudo tee /etc/systemd/system/ufw.service.d/after-netfilter.conf > /dev/null`,
1606-
"sudo systemctl daemon-reload",
1607-
)
1596+
// Install an idempotent Docker firewall hook before enabling UFW. Some
1597+
// Nebius images start Docker after cloud-init runcmd; Docker creates or
1598+
// resets DOCKER-USER during startup, so the rules need to be re-applied after
1599+
// docker.service starts instead of only once during runcmd.
1600+
commands = append(commands, generateDockerFirewallInstallCommands()...)
16081601

16091602
// Generate UFW firewall commands (similar to Shadeform's approach)
16101603
// UFW (Uncomplicated Firewall) is available on Ubuntu/Debian instances
16111604
commands = append(commands, generateUFWCommands(firewallRules)...)
16121605

1613-
// Generate IPTables firewall commands to ensure docker ports are not made immediately
1614-
// accessible from the internet by default.
1615-
commands = append(commands, generateIPTablesCommands()...)
1616-
1617-
// Save the complete iptables state (UFW chains + DOCKER-USER rules) so it
1618-
// survives instance stop/start cycles. Cloud-init runcmd only executes on
1619-
// first boot; on subsequent boots netfilter-persistent restores this snapshot,
1620-
// then UFW starts after it (due to the drop-in above) and re-applies its rules.
1621-
// This provides defense-in-depth: even if UFW fails for any reason, the
1622-
// netfilter-persistent snapshot ensures port 22 and DOCKER-USER rules persist.
1623-
commands = append(commands, "sudo netfilter-persistent save")
1606+
// Apply immediately for images where Docker is already running. The
1607+
// docker.service ExecStartPost hook handles images where Docker starts later.
1608+
commands = append(commands, "sudo /usr/local/sbin/brev-apply-docker-firewall.sh || true")
16241609

16251610
if len(commands) > 0 {
16261611
// Use runcmd to execute firewall setup commands
@@ -1662,11 +1647,75 @@ func generateUFWCommands(firewallRules v1.FirewallRules) []string {
16621647
return commands
16631648
}
16641649

1650+
const (
1651+
// Keep these paths stable: they are useful operator touchpoints when
1652+
// debugging instance firewall state with systemctl/cat/iptables.
1653+
dockerFirewallScriptPath = "/usr/local/sbin/brev-apply-docker-firewall.sh"
1654+
dockerServiceDropInDir = "/etc/systemd/system/docker.service.d"
1655+
dockerFirewallDropInPath = dockerServiceDropInDir + "/10-brev-firewall.conf"
1656+
)
1657+
1658+
func generateDockerFirewallInstallCommands() []string {
1659+
// Docker published ports are not governed by UFW's INPUT policy. Docker adds
1660+
// NAT/FORWARD rules that can make `docker run -p host:container` reachable
1661+
// from the public internet even when UFW says incoming traffic is denied.
1662+
//
1663+
// DOCKER-USER is Docker's documented filter hook for this traffic. The
1664+
// ordering is important: some Nebius images run cloud-init before Docker has
1665+
// created DOCKER-USER, and Docker may create/reset the chain during daemon
1666+
// startup. We therefore install both:
1667+
// - an immediate cloud-init run for images where Docker is already active
1668+
// - a docker.service ExecStartPost hook for images where Docker starts later
1669+
//
1670+
// The generated script exits successfully even if an iptables command fails
1671+
// because failing Docker startup would be worse operationally. Validation
1672+
// tests assert that the rule set is actually present and blocks published
1673+
// ports.
1674+
scriptLines := append([]string{
1675+
"#!/bin/sh",
1676+
"set +e",
1677+
}, generateIPTablesCommands()...)
1678+
scriptLines = append(scriptLines, "exit 0")
1679+
1680+
return []string{
1681+
generatePrintfToFileCommand(scriptLines, dockerFirewallScriptPath),
1682+
"sudo chmod 0755 " + dockerFirewallScriptPath,
1683+
"sudo mkdir -p " + dockerServiceDropInDir,
1684+
generatePrintfToFileCommand([]string{
1685+
"[Service]",
1686+
"ExecStartPost=" + dockerFirewallScriptPath,
1687+
}, dockerFirewallDropInPath),
1688+
"sudo systemctl daemon-reload",
1689+
}
1690+
}
1691+
1692+
func generatePrintfToFileCommand(lines []string, path string) string {
1693+
quotedLines := make([]string, 0, len(lines))
1694+
for _, line := range lines {
1695+
quotedLines = append(quotedLines, shellSingleQuote(line))
1696+
}
1697+
1698+
return fmt.Sprintf("printf '%%s\\n' %s | sudo tee %s > /dev/null", strings.Join(quotedLines, " "), path)
1699+
}
1700+
1701+
func shellSingleQuote(value string) string {
1702+
return "'" + strings.ReplaceAll(value, "'", `'\''`) + "'"
1703+
}
1704+
16651705
// generateIPTablesCommands generates IPTables firewall commands to ensure docker ports are not made immediately
16661706
// accessible from the internet by default.
16671707
func generateIPTablesCommands() []string {
16681708
commands := []string{
1669-
"iptables -F DOCKER-USER",
1709+
// CPU images can run cloud-init before Docker has created DOCKER-USER.
1710+
// Create it first so the immediate cloud-init run succeeds; when Docker
1711+
// starts later, the systemd ExecStartPost hook above re-applies the same
1712+
// rules after Docker has finished creating/resetting its own chains.
1713+
"iptables -N DOCKER-USER 2>/dev/null || true",
1714+
"iptables -F DOCKER-USER || true",
1715+
1716+
// Preserve already-established connections, then allow container bridge
1717+
// egress and intra-bridge traffic. The final DROP below is what prevents
1718+
// externally-published Docker ports from being reachable by default.
16701719
"iptables -A DOCKER-USER -m conntrack --ctstate ESTABLISHED,RELATED -j ACCEPT",
16711720
"iptables -A DOCKER-USER -i docker0 ! -o docker0 -j ACCEPT",
16721721
"iptables -A DOCKER-USER -i br+ ! -o br+ -j ACCEPT",

v1/providers/nebius/instance_test.go

Lines changed: 20 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -84,6 +84,26 @@ func TestNebiusClient_MergeInstanceForUpdate(t *testing.T) {
8484
assert.Equal(t, newInstance.Status, merged.Status)
8585
}
8686

87+
func TestGenerateCloudInitUserDataInstallsDockerFirewallHook(t *testing.T) {
88+
script := generateCloudInitUserData("ssh-rsa test", v1.FirewallRules{})
89+
90+
assert.NotContains(t, script, "iptables-persistent")
91+
assert.NotContains(t, script, "netfilter-persistent")
92+
assert.Contains(t, script, "/usr/local/sbin/brev-apply-docker-firewall.sh")
93+
assert.Contains(t, script, "/etc/systemd/system/docker.service.d")
94+
assert.Contains(t, script, "ExecStartPost=/usr/local/sbin/brev-apply-docker-firewall.sh")
95+
assert.Contains(t, script, "sudo /usr/local/sbin/brev-apply-docker-firewall.sh || true")
96+
}
97+
98+
func TestGenerateIPTablesCommandsCreateDockerUserChainBeforeFlush(t *testing.T) {
99+
commands := generateIPTablesCommands()
100+
101+
assert.GreaterOrEqual(t, len(commands), 2)
102+
assert.Equal(t, "iptables -N DOCKER-USER 2>/dev/null || true", commands[0])
103+
assert.Equal(t, "iptables -F DOCKER-USER || true", commands[1])
104+
assert.Contains(t, strings.Join(commands, "\n"), "iptables -A DOCKER-USER -j DROP")
105+
}
106+
87107
// BenchmarkCreateInstance benchmarks the CreateInstance method
88108
func BenchmarkCreateInstance(b *testing.B) {
89109
b.Skip("CreateInstance requires real SDK initialization - use integration tests instead")

0 commit comments

Comments
 (0)