Skip to content

Commit f63e458

Browse files
committed
fix(network): preserve pre-existing root qdisc on tc-based attacks
Network attacks (delay, loss, corruption, bandwidth) on hosts where the kernel had already attached a root qdisc to the target interface (e.g. `mq` on GKE COS / EKS / AKS / RHCOS) previously failed with `NLM_F_REPLACE needed to override`. Bump action_kit_commons to pick up the `tc qdisc replace`-based apply path. Propagate the preflight warnings returned by `netfault.Apply` to the action Start result as Warn-level messages. The user sees a warning when an interface has a user-installed root qdisc (htb, cake, ...) that the kernel will not auto-restore on revert. Add an e2e test (`network delay preserves pre-existing root qdisc`) covering the two preflight branches: a veth interface with the kernel-default `noqueue` (no warning expected) and a dummy with a user-installed `htb` (warning expected). The apply path is kind- agnostic so a single case per branch is enough; parser coverage across qdisc kinds lives in netfault/preflight_test.go fixtures. Note: the test deliberately does not assert which kind the kernel attaches after `qdisc del root` — that's a kernel property dependent on device flags (IFF_NO_QUEUE) and net.core.default_qdisc, not this extension's behavior. We only assert that our injected `prio` is gone.
1 parent 110fcd1 commit f63e458

6 files changed

Lines changed: 141 additions & 5 deletions

File tree

CHANGELOG.md

Lines changed: 5 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1,5 +1,10 @@
11
# Changelog
22

3+
## Unreleased
4+
5+
- Network attacks (delay, loss, corruption, bandwidth) now work on hosts where the kernel has already attached a root qdisc to the target interface (e.g. `mq` on GKE COS / EKS / AKS / RHCOS). Previously the attack failed to start with `NLM_F_REPLACE needed to override`.
6+
- The kernel's default root qdisc (`mq`, `noqueue`, `fq_codel`, `pfifo_fast`, `fq`) is restored automatically after the attack ends. If the interface has a user-installed root qdisc (e.g. `htb`, `cake`), a warning is surfaced and the kernel default is restored on revert instead.
7+
38
## v1.5.6
49

510
- DNS Error Injection: new `hostname` parameter to restrict injection to DNS queries with matching query names (exact, case-insensitive, IDN-aware); also exposes the new `hostname_filtered` metric in the live statistics widget

e2e/integration_test.go

Lines changed: 123 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -192,6 +192,9 @@ func TestWithMinikube(t *testing.T) {
192192
}, {
193193
Name: "fill memory",
194194
Test: testFillMemory,
195+
}, {
196+
Name: "network delay preserves pre-existing root qdisc",
197+
Test: testNetworkRootQdiscPreserved,
195198
},
196199
})
197200
}
@@ -674,6 +677,126 @@ func testNetworkDelay(t *testing.T, m *e2e.Minikube, e *e2e.Extension) {
674677
requireAllSidecarsCleanedUp(t, m, e)
675678
}
676679

680+
// testNetworkRootQdiscPreserved exercises the two preflight branches: an
681+
// interface whose root qdisc is in the kernel-auto-restored allowlist (no
682+
// warning expected) and one whose root is user-installed (warning expected).
683+
// The apply path (`tc qdisc replace`) is kind-agnostic so a single safe-list
684+
// case is enough; parser coverage across kinds lives in
685+
// netfault/preflight_test.go fixtures.
686+
//
687+
// What we do *not* assert: the specific kind the kernel attaches as the new
688+
// root after `qdisc del`. That's a kernel property dependent on device flags
689+
// (`IFF_NO_QUEUE`) and `net.core.default_qdisc`, not this extension's
690+
// behavior.
691+
func testNetworkRootQdiscPreserved(t *testing.T, m *e2e.Minikube, e *e2e.Extension) {
692+
tests := []struct {
693+
name string
694+
ifc string
695+
setupCmds [][]string
696+
expectWarning bool
697+
}{
698+
{
699+
name: "kernel-default qdisc (veth, noqueue) — no warning",
700+
ifc: "sb-test-veth0",
701+
setupCmds: [][]string{
702+
{"sudo", "ip", "link", "add", "sb-test-veth0", "type", "veth", "peer", "name", "sb-test-veth1"},
703+
{"sudo", "ip", "link", "set", "sb-test-veth0", "up"},
704+
},
705+
},
706+
{
707+
name: "user-installed qdisc (htb) — warning",
708+
ifc: "sb-test-htb",
709+
setupCmds: [][]string{
710+
{"sudo", "ip", "link", "add", "sb-test-htb", "type", "dummy"},
711+
{"sudo", "ip", "link", "set", "sb-test-htb", "up"},
712+
{"sudo", "tc", "qdisc", "replace", "dev", "sb-test-htb", "root", "handle", "1:", "htb", "default", "30"},
713+
},
714+
expectWarning: true,
715+
},
716+
}
717+
718+
for _, tt := range tests {
719+
t.Run(tt.name, func(t *testing.T) {
720+
for _, c := range tt.setupCmds {
721+
out, err := runInMinikube(m, c...)
722+
require.NoError(t, err, "setup command failed: %v: %s", c, string(out))
723+
}
724+
defer func() {
725+
_, _ = runInMinikube(m, "sudo", "ip", "link", "del", tt.ifc)
726+
}()
727+
728+
config := map[string]any{
729+
"duration": 20000,
730+
"networkDelay": 100,
731+
"networkDelayJitter": false,
732+
"networkInterface": []string{tt.ifc},
733+
}
734+
735+
action, err := e.RunAction(exthost.BaseActionID+".network_delay", getTarget(m), config, defaultExecutionContext)
736+
defer func() { _ = action.Cancel() }()
737+
require.NoError(t, err)
738+
739+
require.EventuallyWithT(t, func(t *assert.CollectT) {
740+
assert.Equal(t, "prio", rootQdiscKind(t, m, tt.ifc))
741+
}, 5*time.Second, 100*time.Millisecond, "attack did not install prio root qdisc")
742+
743+
gotWarning := hasWarningMatching(action.Messages(), "Pre-existing qdisc")
744+
assert.Equal(t, tt.expectWarning, gotWarning, "preflight warning expectation: got messages %+v", action.Messages())
745+
746+
require.NoError(t, action.Cancel())
747+
748+
require.EventuallyWithT(t, func(t *assert.CollectT) {
749+
assert.NotEqual(t, "prio", rootQdiscKind(t, m, tt.ifc), "attack qdisc still present after Cancel")
750+
}, 5*time.Second, 100*time.Millisecond)
751+
})
752+
}
753+
requireAllSidecarsCleanedUp(t, m, e)
754+
}
755+
756+
// rootQdiscKind returns the root qdisc kind of ifc on the minikube node, or
757+
// "" on parse/SSH failure. Takes assert.TestingT (not require.TestingT) so it
758+
// is safe to call from inside EventuallyWithT.
759+
//
760+
// Uses `tc qdisc show` (no -dev arg) and filters by interface so the parser
761+
// stays in lockstep with the production parser in netfault/preflight.go.
762+
// `tc qdisc show dev <ifc>` omits the `dev <ifc>` field from its output,
763+
// which would require a separate parser.
764+
func rootQdiscKind(t assert.TestingT, m *e2e.Minikube, ifc string) string {
765+
out, err := runInMinikube(m, "sudo", "tc", "qdisc", "show")
766+
if !assert.NoError(t, err, "tc qdisc show failed: %s", string(out)) {
767+
return ""
768+
}
769+
for _, line := range strings.Split(string(out), "\n") {
770+
fields := strings.Fields(strings.TrimSpace(line))
771+
if len(fields) < 6 || fields[0] != "qdisc" || fields[3] != "dev" || fields[4] != ifc || fields[5] != "root" {
772+
continue
773+
}
774+
return fields[1]
775+
}
776+
return ""
777+
}
778+
779+
// hasWarningMatching returns true if any Warn-level message contains ALL of
780+
// the given substrings.
781+
func hasWarningMatching(messages []action_kit_api.Message, substrs ...string) bool {
782+
for _, msg := range messages {
783+
if msg.Level == nil || *msg.Level != action_kit_api.Warn {
784+
continue
785+
}
786+
matched := true
787+
for _, s := range substrs {
788+
if !strings.Contains(msg.Message, s) {
789+
matched = false
790+
break
791+
}
792+
}
793+
if matched {
794+
return true
795+
}
796+
}
797+
return false
798+
}
799+
677800
func testNetworkDelayTcpPsh(t *testing.T, m *e2e.Minikube, e *e2e.Extension) {
678801
if m.Runtime == "cri-o" && m.Driver == "docker" {
679802
t.Skip("Due to https://github.com/kubernetes/minikube/issues/16371 this test is skipped for cri-o")

exthost/action_network.go

Lines changed: 7 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -137,7 +137,13 @@ func (a *networkAction) Start(ctx context.Context, state *NetworkActionState) (*
137137
},
138138
}}
139139

140-
err = netfault.Apply(ctx, runner(a.ociRuntime, state.Sidecar), opts)
140+
warnings, err := netfault.Apply(ctx, runner(a.ociRuntime, state.Sidecar), opts)
141+
for _, w := range warnings {
142+
result.Messages = new(append(*result.Messages, action_kit_api.Message{
143+
Level: extutil.Ptr(action_kit_api.Warn),
144+
Message: w,
145+
}))
146+
}
141147
if err != nil {
142148
var toomany *netfault.ErrTooManyTcCommands
143149
if errors.As(err, &toomany) {

exthost/timetravel/ntp.go

Lines changed: 3 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -20,6 +20,8 @@ func AdjustNtpTrafficRules(ctx context.Context, runner netfault.CommandRunner, a
2020
if allowNtpTraffic {
2121
return netfault.Revert(ctx, runner, opts)
2222
} else {
23-
return netfault.Apply(ctx, runner, opts)
23+
// Blackhole does not install a root qdisc, so no preflight warnings are produced.
24+
_, err := netfault.Apply(ctx, runner, opts)
25+
return err
2426
}
2527
}

go.mod

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -13,7 +13,7 @@ require (
1313
github.com/pkg/errors v0.9.1
1414
github.com/rs/zerolog v1.35.1
1515
github.com/steadybit/action-kit/go/action_kit_api/v2 v2.10.5
16-
github.com/steadybit/action-kit/go/action_kit_commons v1.7.0
16+
github.com/steadybit/action-kit/go/action_kit_commons v1.7.1-0.20260529125735-c0fb2339ea0a
1717
github.com/steadybit/action-kit/go/action_kit_sdk v1.3.1
1818
github.com/steadybit/action-kit/go/action_kit_test v1.4.7
1919
github.com/steadybit/discovery-kit/go/discovery_kit_api v1.7.1

go.sum

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -163,8 +163,8 @@ github.com/spf13/pflag v1.0.10/go.mod h1:McXfInJRrz4CZXVZOBLb0bTZqETkiAhM9Iw0y3A
163163
github.com/spkg/bom v0.0.0-20160624110644-59b7046e48ad/go.mod h1:qLr4V1qq6nMqFKkMo8ZTx3f+BZEkzsRUY10Xsm2mwU0=
164164
github.com/steadybit/action-kit/go/action_kit_api/v2 v2.10.5 h1:WQkcNX2us3JyOrdnI3ttxX96nF2JAEQSx/zM8IQGwDo=
165165
github.com/steadybit/action-kit/go/action_kit_api/v2 v2.10.5/go.mod h1:g8gkKZCnaZaxtQseZ/L6/flv3Hutwy0xcVO7P1cbUMQ=
166-
github.com/steadybit/action-kit/go/action_kit_commons v1.7.0 h1:r1gQVsb8nf33bzWNwelsrJZB4xkgutY+eYbRpS5bi9I=
167-
github.com/steadybit/action-kit/go/action_kit_commons v1.7.0/go.mod h1:tgL+7zGBpLZ4yMaXjSZq5ezQuaZJukSWjiRTRyBcKFw=
166+
github.com/steadybit/action-kit/go/action_kit_commons v1.7.1-0.20260529125735-c0fb2339ea0a h1:25Bibg2W9KYslp9HyEvWxjNPQ10uG9uZAgVq8IVjglg=
167+
github.com/steadybit/action-kit/go/action_kit_commons v1.7.1-0.20260529125735-c0fb2339ea0a/go.mod h1:tgL+7zGBpLZ4yMaXjSZq5ezQuaZJukSWjiRTRyBcKFw=
168168
github.com/steadybit/action-kit/go/action_kit_sdk v1.3.1 h1:c83hiU+RLWjqouWR9baiidmYcTtDTdRa5rkWKFvdbc8=
169169
github.com/steadybit/action-kit/go/action_kit_sdk v1.3.1/go.mod h1:DMMqDn4QNetxAoEXSpS7xF/vV+aD6YIDl/CgWOi5ii8=
170170
github.com/steadybit/action-kit/go/action_kit_test v1.4.7 h1:DyW3xYKQTOpCy4GLOShUXYpzfTXH/JgWOcUi5WeC55k=

0 commit comments

Comments
 (0)