Skip to content

Commit 9ad7754

Browse files
committed
fix(netfault): replace root qdisc instead of add to support pre-existing qdiscs
Network attacks using tc (delay, loss, corruption, bandwidth) failed on hosts where the kernel had already attached a root qdisc to the target interface (e.g. `mq` on GKE COS, EKS, AKS). `tc qdisc add ... root` returned `NLM_F_REPLACE needed to override` and the attack could not start. Switch the root qdisc command to `tc qdisc replace ... root` on apply. On revert we still `qdisc del root`; the kernel then re-attaches its default qdisc (`mq` on multi-queue devices, `noqueue` on veth, otherwise the configured `net.core.default_qdisc`), so common cloud node setups are restored to their pre-attack state. Add a preflight inspection that runs `tc qdisc show` on each affected interface and emits a warning if the root qdisc is not one the kernel auto-restores (anything other than mq, noqueue, pfifo_fast, fq_codel, fq). Callers receive the warnings via the new `Apply` return value. Breaking change: `netfault.Apply` now returns `([]string, error)`.
1 parent 9ad8185 commit 9ad7754

18 files changed

Lines changed: 417 additions & 15 deletions

go/action_kit_commons/CHANGELOG.md

Lines changed: 12 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1,5 +1,17 @@
11
# Changelog
22

3+
## 1.8.0
4+
5+
- netfault: use `tc qdisc replace` (instead of `add`) for the root qdisc in
6+
delay/loss/corruption/bandwidth attacks so they no longer fail on hosts
7+
with a pre-existing root qdisc (e.g. `mq` on GKE COS / EKS / AKS).
8+
- netfault: add preflight check that warns when an interface has a
9+
user-installed root qdisc (anything other than `mq`, `noqueue`,
10+
`pfifo_fast`, `fq_codel`, `fq`); the kernel default will be restored
11+
after revert in that case.
12+
- **Breaking:** `netfault.Apply` now returns `([]string, error)` — the
13+
string slice contains preflight warnings to surface to the user.
14+
315
## 1.6.1
416

517
- Add UseMangleChain to TcpResetOpts to enable tcp reset on istio

go/action_kit_commons/network/netfault/bandwidth.go

Lines changed: 5 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -50,6 +50,10 @@ func (o *LimitBandwidthOpts) ipCommands(_ family, _ mode) ([]string, error) {
5050
return nil, nil
5151
}
5252

53+
func (o *LimitBandwidthOpts) affectedInterfaces() []string {
54+
return o.Interfaces
55+
}
56+
5357
func (o *LimitBandwidthOpts) tcCommands(mode mode) ([]string, error) {
5458
var cmds []string
5559

@@ -63,7 +67,7 @@ func (o *LimitBandwidthOpts) tcCommands(mode mode) ([]string, error) {
6367

6468
filter := optimizeFilter(o.Filter)
6569
for _, ifc := range o.Interfaces {
66-
cmds = append(cmds, fmt.Sprintf("qdisc %s dev %s root handle 1: htb default 30", mode, ifc))
70+
cmds = append(cmds, fmt.Sprintf("qdisc %s dev %s root handle 1: htb default 30", rootQdiscVerb(mode), ifc))
6771
cmds = append(cmds, fmt.Sprintf("class %s dev %s parent 1: classid %s htb rate %s", mode, ifc, handleInclude, o.Bandwidth))
6872

6973
filterCmds, err := tcCommandsForFilter(mode, filter, ifc)

go/action_kit_commons/network/netfault/bandwidth_test.go

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -61,7 +61,7 @@ func TestLimitBandwidthOpts_TcCommands(t *testing.T) {
6161
Bandwidth: "100mbit",
6262
Interfaces: []string{"eth0"},
6363
},
64-
wantAdd: []byte(`qdisc add dev eth0 root handle 1: htb default 30
64+
wantAdd: []byte(`qdisc replace dev eth0 root handle 1: htb default 30
6565
class add dev eth0 parent 1: classid 1:3 htb rate 100mbit
6666
filter add dev eth0 protocol ip parent 1: prio 1 u32 match ip src 192.168.2.1/32 match ip sport 80 0xffff flowid 1:1
6767
filter add dev eth0 protocol ip parent 1: prio 2 u32 match ip dst 192.168.2.1/32 match ip dport 80 0xffff flowid 1:1

go/action_kit_commons/network/netfault/blackhole.go

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -87,6 +87,10 @@ func (o *BlackholeOpts) tcCommands(_ mode) ([]string, error) {
8787
return nil, nil
8888
}
8989

90+
func (o *BlackholeOpts) affectedInterfaces() []string {
91+
return nil
92+
}
93+
9094
func (o *BlackholeOpts) String() string {
9195
var sb strings.Builder
9296
sb.WriteString("blocking traffic ")

go/action_kit_commons/network/netfault/delay.go

Lines changed: 5 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -63,6 +63,10 @@ func (o *DelayOpts) ipCommands(_ family, _ mode) ([]string, error) {
6363
return nil, nil
6464
}
6565

66+
func (o *DelayOpts) affectedInterfaces() []string {
67+
return o.Interfaces
68+
}
69+
6670
const steadybitDelayFwMark uint32 = 0x1
6771

6872
func (o *DelayOpts) iptablesScripts(mode mode) ([]string, []string, error) {
@@ -174,7 +178,7 @@ func (o *DelayOpts) tcCommands(mode mode) ([]string, error) {
174178

175179
filter := optimizeFilter(o.Filter)
176180
for _, ifc := range o.Interfaces {
177-
cmds = append(cmds, fmt.Sprintf("qdisc %s dev %s root handle 1: prio priomap 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0", mode, ifc))
181+
cmds = append(cmds, fmt.Sprintf("qdisc %s dev %s root handle 1: prio priomap 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0", rootQdiscVerb(mode), ifc))
178182
cmds = append(cmds, fmt.Sprintf("qdisc %s dev %s parent %s handle 30: netem delay %dms %dms", mode, ifc, handleInclude, o.Delay.Milliseconds(), o.Jitter.Milliseconds()))
179183

180184
if o.TcpPshOnly {

go/action_kit_commons/network/netfault/delay_test.go

Lines changed: 3 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -55,7 +55,7 @@ func TestDelayOpts_TcCommands(t *testing.T) {
5555
Jitter: 10 * time.Millisecond,
5656
Interfaces: []string{"eth0"},
5757
},
58-
wantAdd: []byte(`qdisc add dev eth0 root handle 1: prio priomap 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0
58+
wantAdd: []byte(`qdisc replace dev eth0 root handle 1: prio priomap 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0
5959
qdisc add dev eth0 parent 1:3 handle 30: netem delay 100ms 10ms
6060
filter add dev eth0 protocol ip parent 1: prio 1 u32 match ip src 192.168.2.1/32 match ip sport 80 0xffff flowid 1:1
6161
filter add dev eth0 protocol ip parent 1: prio 2 u32 match ip dst 192.168.2.1/32 match ip dport 80 0xffff flowid 1:1
@@ -112,7 +112,7 @@ qdisc del dev eth0 root handle 1: prio priomap 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0
112112
Interfaces: []string{"eth0"},
113113
TcpPshOnly: true,
114114
},
115-
wantAdd: []byte(`qdisc add dev eth0 root handle 1: prio priomap 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0
115+
wantAdd: []byte(`qdisc replace dev eth0 root handle 1: prio priomap 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0
116116
qdisc add dev eth0 parent 1:3 handle 30: netem delay 100ms 10ms
117117
filter add dev eth0 protocol ip parent 1: prio 1 handle 0x1 fw flowid 1:3
118118
filter add dev eth0 protocol ipv6 parent 1: prio 2 handle 0x1 fw flowid 1:3
@@ -141,7 +141,7 @@ qdisc del dev eth0 root handle 1: prio priomap 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0
141141
Jitter: 10 * time.Millisecond,
142142
Interfaces: []string{"eth0"},
143143
},
144-
wantAdd: []byte(`qdisc add dev eth0 root handle 1: prio priomap 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0
144+
wantAdd: []byte(`qdisc replace dev eth0 root handle 1: prio priomap 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0
145145
qdisc add dev eth0 parent 1:3 handle 30: netem delay 100ms 10ms
146146
filter add dev eth0 protocol ip parent 1: prio 1 u32 match ip src 192.168.2.1/32 match ip sport 80 0xffff flowid 1:1
147147
filter add dev eth0 protocol ip parent 1: prio 2 u32 match ip dst 192.168.2.1/32 match ip dport 80 0xffff flowid 1:1

go/action_kit_commons/network/netfault/netfault.go

Lines changed: 14 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -42,10 +42,22 @@ type CommandRunner interface {
4242
id() string
4343
}
4444

45-
func Apply(ctx context.Context, runner CommandRunner, opts Opts) error {
46-
return generateAndRunCommands(ctx, runner, opts, modeAdd)
45+
// Apply installs the attack described by opts. It returns warnings about
46+
// pre-existing root qdiscs that will be overwritten and not fully restored
47+
// on revert; callers should surface these to the user. An error indicates
48+
// that one or more apply steps failed.
49+
func Apply(ctx context.Context, runner CommandRunner, opts Opts) ([]string, error) {
50+
warnings := preflightWarnings(ctx, runner, opts.affectedInterfaces())
51+
if err := generateAndRunCommands(ctx, runner, opts, modeAdd); err != nil {
52+
return warnings, err
53+
}
54+
return warnings, nil
4755
}
4856

57+
// Revert removes the attack described by opts. After `qdisc del root` the
58+
// kernel auto-attaches the device's default qdisc (mq/noqueue/fq_codel/...).
59+
// Anything not in safeRootQdiscKinds will not be restored to its original
60+
// configuration; the user was warned during Apply.
4961
func Revert(ctx context.Context, runner CommandRunner, opts Opts) error {
5062
return generateAndRunCommands(ctx, runner, opts, modeDelete)
5163
}

go/action_kit_commons/network/netfault/netfault_test.go

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -45,7 +45,7 @@ func TestApply_Order_IptablesBeforeTcWhenTcpPshOnly(t *testing.T) {
4545
}
4646

4747
r := &fakeRunner{}
48-
err := Apply(context.Background(), r, opts)
48+
_, err := Apply(context.Background(), r, opts)
4949
assert.NoError(t, err)
5050

5151
iptablesIdx := -1
@@ -54,7 +54,7 @@ func TestApply_Order_IptablesBeforeTcWhenTcpPshOnly(t *testing.T) {
5454
if len(c.args) > 0 && c.args[0] == "iptables-restore" {
5555
iptablesIdx = i
5656
}
57-
if len(c.args) > 0 && c.args[0] == "tc" && len(c.cmds) > 0 && strings.HasPrefix(c.cmds[0], "qdisc add") {
57+
if len(c.args) > 0 && c.args[0] == "tc" && len(c.cmds) > 0 && strings.HasPrefix(c.cmds[0], "qdisc replace") {
5858
tcBatchIdx = i
5959
}
6060
}

go/action_kit_commons/network/netfault/packageCorruption.go

Lines changed: 5 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -49,12 +49,16 @@ func (o *CorruptPackagesOpts) ipCommands(_ family, _ mode) ([]string, error) {
4949
return nil, nil
5050
}
5151

52+
func (o *CorruptPackagesOpts) affectedInterfaces() []string {
53+
return o.Interfaces
54+
}
55+
5256
func (o *CorruptPackagesOpts) tcCommands(mode mode) ([]string, error) {
5357
var cmds []string
5458

5559
filter := optimizeFilter(o.Filter)
5660
for _, ifc := range o.Interfaces {
57-
cmds = append(cmds, fmt.Sprintf("qdisc %s dev %s root handle 1: prio priomap 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0", mode, ifc))
61+
cmds = append(cmds, fmt.Sprintf("qdisc %s dev %s root handle 1: prio priomap 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0", rootQdiscVerb(mode), ifc))
5862
cmds = append(cmds, fmt.Sprintf("qdisc %s dev %s parent %s handle 30: netem corrupt %d%%", mode, ifc, handleInclude, o.Corruption))
5963

6064
filterCmds, err := tcCommandsForFilter(mode, filter, ifc)

go/action_kit_commons/network/netfault/packageCorruption_test.go

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -38,7 +38,7 @@ func TestCorruptPackagesOpts_TcCommands(t *testing.T) {
3838
Corruption: 90,
3939
Interfaces: []string{"eth0"},
4040
},
41-
wantAdd: []byte(`qdisc add dev eth0 root handle 1: prio priomap 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0
41+
wantAdd: []byte(`qdisc replace dev eth0 root handle 1: prio priomap 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0
4242
qdisc add dev eth0 parent 1:3 handle 30: netem corrupt 90%
4343
filter add dev eth0 protocol ip parent 1: prio 1 u32 match ip src 192.168.2.1/32 match ip sport 80 0xffff flowid 1:1
4444
filter add dev eth0 protocol ip parent 1: prio 2 u32 match ip dst 192.168.2.1/32 match ip dport 80 0xffff flowid 1:1

0 commit comments

Comments
 (0)