diff --git a/CHANGELOG.md b/CHANGELOG.md index 10d5d4a..3195824 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -1,5 +1,10 @@ # Changelog +## Unreleased + +- Network attacks (delay, loss, corruption, bandwidth) now work on hosts where the kernel has already attached a root qdisc to the target interface (e.g. `mq` on GKE COS / EKS / AKS / RHCOS). Previously the attack failed to start with `NLM_F_REPLACE needed to override`. +- The kernel's default root qdisc (`mq`, `noqueue`, `fq_codel`, `pfifo_fast`, `fq`) is restored automatically after the attack ends. If the interface has a user-installed root qdisc (e.g. `htb`, `cake`), a warning is surfaced and the kernel default is restored on revert instead. + ## v1.5.6 - DNS Error Injection: new `hostname` parameter to restrict injection to DNS queries with matching query names (exact, case-insensitive, IDN-aware); also exposes the new `hostname_filtered` metric in the live statistics widget diff --git a/e2e/integration_test.go b/e2e/integration_test.go index 3800747..bfe8f69 100644 --- a/e2e/integration_test.go +++ b/e2e/integration_test.go @@ -192,6 +192,9 @@ func TestWithMinikube(t *testing.T) { }, { Name: "fill memory", Test: testFillMemory, + }, { + Name: "network delay preserves pre-existing root qdisc", + Test: testNetworkRootQdiscPreserved, }, }) } @@ -674,6 +677,126 @@ func testNetworkDelay(t *testing.T, m *e2e.Minikube, e *e2e.Extension) { requireAllSidecarsCleanedUp(t, m, e) } +// testNetworkRootQdiscPreserved exercises the two preflight branches: an +// interface whose root qdisc is in the kernel-auto-restored allowlist (no +// warning expected) and one whose root is user-installed (warning expected). +// The apply path (`tc qdisc replace`) is kind-agnostic so a single safe-list +// case is enough; parser coverage across kinds lives in +// netfault/preflight_test.go fixtures. +// +// What we do *not* assert: the specific kind the kernel attaches as the new +// root after `qdisc del`. That's a kernel property dependent on device flags +// (`IFF_NO_QUEUE`) and `net.core.default_qdisc`, not this extension's +// behavior. +func testNetworkRootQdiscPreserved(t *testing.T, m *e2e.Minikube, e *e2e.Extension) { + tests := []struct { + name string + ifc string + setupCmds [][]string + expectWarning bool + }{ + { + name: "kernel-default qdisc (veth, noqueue) — no warning", + ifc: "sb-test-veth0", + setupCmds: [][]string{ + {"sudo", "ip", "link", "add", "sb-test-veth0", "type", "veth", "peer", "name", "sb-test-veth1"}, + {"sudo", "ip", "link", "set", "sb-test-veth0", "up"}, + }, + }, + { + name: "user-installed qdisc (htb) — warning", + ifc: "sb-test-htb", + setupCmds: [][]string{ + {"sudo", "ip", "link", "add", "sb-test-htb", "type", "dummy"}, + {"sudo", "ip", "link", "set", "sb-test-htb", "up"}, + {"sudo", "tc", "qdisc", "replace", "dev", "sb-test-htb", "root", "handle", "1:", "htb", "default", "30"}, + }, + expectWarning: true, + }, + } + + for _, tt := range tests { + t.Run(tt.name, func(t *testing.T) { + for _, c := range tt.setupCmds { + out, err := runInMinikube(m, c...) + require.NoError(t, err, "setup command failed: %v: %s", c, string(out)) + } + defer func() { + _, _ = runInMinikube(m, "sudo", "ip", "link", "del", tt.ifc) + }() + + config := map[string]any{ + "duration": 20000, + "networkDelay": 100, + "networkDelayJitter": false, + "networkInterface": []string{tt.ifc}, + } + + action, err := e.RunAction(exthost.BaseActionID+".network_delay", getTarget(m), config, defaultExecutionContext) + defer func() { _ = action.Cancel() }() + require.NoError(t, err) + + require.EventuallyWithT(t, func(t *assert.CollectT) { + assert.Equal(t, "prio", rootQdiscKind(t, m, tt.ifc)) + }, 5*time.Second, 100*time.Millisecond, "attack did not install prio root qdisc") + + gotWarning := hasWarningMatching(action.Messages(), "Pre-existing qdisc") + assert.Equal(t, tt.expectWarning, gotWarning, "preflight warning expectation: got messages %+v", action.Messages()) + + require.NoError(t, action.Cancel()) + + require.EventuallyWithT(t, func(t *assert.CollectT) { + assert.NotEqual(t, "prio", rootQdiscKind(t, m, tt.ifc), "attack qdisc still present after Cancel") + }, 5*time.Second, 100*time.Millisecond) + }) + } + requireAllSidecarsCleanedUp(t, m, e) +} + +// rootQdiscKind returns the root qdisc kind of ifc on the minikube node, or +// "" on parse/SSH failure. Takes assert.TestingT (not require.TestingT) so it +// is safe to call from inside EventuallyWithT. +// +// Uses `tc qdisc show` (no -dev arg) and filters by interface so the parser +// stays in lockstep with the production parser in netfault/preflight.go. +// `tc qdisc show dev ` omits the `dev ` field from its output, +// which would require a separate parser. +func rootQdiscKind(t assert.TestingT, m *e2e.Minikube, ifc string) string { + out, err := runInMinikube(m, "sudo", "tc", "qdisc", "show") + if !assert.NoError(t, err, "tc qdisc show failed: %s", string(out)) { + return "" + } + for _, line := range strings.Split(string(out), "\n") { + fields := strings.Fields(strings.TrimSpace(line)) + if len(fields) < 6 || fields[0] != "qdisc" || fields[3] != "dev" || fields[4] != ifc || fields[5] != "root" { + continue + } + return fields[1] + } + return "" +} + +// hasWarningMatching returns true if any Warn-level message contains ALL of +// the given substrings. +func hasWarningMatching(messages []action_kit_api.Message, substrs ...string) bool { + for _, msg := range messages { + if msg.Level == nil || *msg.Level != action_kit_api.Warn { + continue + } + matched := true + for _, s := range substrs { + if !strings.Contains(msg.Message, s) { + matched = false + break + } + } + if matched { + return true + } + } + return false +} + func testNetworkDelayTcpPsh(t *testing.T, m *e2e.Minikube, e *e2e.Extension) { if m.Runtime == "cri-o" && m.Driver == "docker" { t.Skip("Due to https://github.com/kubernetes/minikube/issues/16371 this test is skipped for cri-o") diff --git a/exthost/action_network.go b/exthost/action_network.go index 4541da1..6bb814a 100644 --- a/exthost/action_network.go +++ b/exthost/action_network.go @@ -137,7 +137,13 @@ func (a *networkAction) Start(ctx context.Context, state *NetworkActionState) (* }, }} - err = netfault.Apply(ctx, runner(a.ociRuntime, state.Sidecar), opts) + warnings, err := netfault.Apply(ctx, runner(a.ociRuntime, state.Sidecar), opts) + for _, w := range warnings { + result.Messages = new(append(*result.Messages, action_kit_api.Message{ + Level: extutil.Ptr(action_kit_api.Warn), + Message: w, + })) + } if err != nil { var toomany *netfault.ErrTooManyTcCommands if errors.As(err, &toomany) { diff --git a/exthost/timetravel/ntp.go b/exthost/timetravel/ntp.go index f09d05a..184397b 100644 --- a/exthost/timetravel/ntp.go +++ b/exthost/timetravel/ntp.go @@ -20,6 +20,8 @@ func AdjustNtpTrafficRules(ctx context.Context, runner netfault.CommandRunner, a if allowNtpTraffic { return netfault.Revert(ctx, runner, opts) } else { - return netfault.Apply(ctx, runner, opts) + // Blackhole does not install a root qdisc, so no preflight warnings are produced. + _, err := netfault.Apply(ctx, runner, opts) + return err } } diff --git a/go.mod b/go.mod index 708b6a9..a91497b 100644 --- a/go.mod +++ b/go.mod @@ -13,7 +13,7 @@ require ( github.com/pkg/errors v0.9.1 github.com/rs/zerolog v1.35.1 github.com/steadybit/action-kit/go/action_kit_api/v2 v2.10.5 - github.com/steadybit/action-kit/go/action_kit_commons v1.7.0 + github.com/steadybit/action-kit/go/action_kit_commons v1.7.1-0.20260529125735-c0fb2339ea0a github.com/steadybit/action-kit/go/action_kit_sdk v1.3.1 github.com/steadybit/action-kit/go/action_kit_test v1.4.7 github.com/steadybit/discovery-kit/go/discovery_kit_api v1.7.1 diff --git a/go.sum b/go.sum index c936744..9eae3df 100644 --- a/go.sum +++ b/go.sum @@ -163,8 +163,8 @@ github.com/spf13/pflag v1.0.10/go.mod h1:McXfInJRrz4CZXVZOBLb0bTZqETkiAhM9Iw0y3A github.com/spkg/bom v0.0.0-20160624110644-59b7046e48ad/go.mod h1:qLr4V1qq6nMqFKkMo8ZTx3f+BZEkzsRUY10Xsm2mwU0= github.com/steadybit/action-kit/go/action_kit_api/v2 v2.10.5 h1:WQkcNX2us3JyOrdnI3ttxX96nF2JAEQSx/zM8IQGwDo= github.com/steadybit/action-kit/go/action_kit_api/v2 v2.10.5/go.mod h1:g8gkKZCnaZaxtQseZ/L6/flv3Hutwy0xcVO7P1cbUMQ= -github.com/steadybit/action-kit/go/action_kit_commons v1.7.0 h1:r1gQVsb8nf33bzWNwelsrJZB4xkgutY+eYbRpS5bi9I= -github.com/steadybit/action-kit/go/action_kit_commons v1.7.0/go.mod h1:tgL+7zGBpLZ4yMaXjSZq5ezQuaZJukSWjiRTRyBcKFw= +github.com/steadybit/action-kit/go/action_kit_commons v1.7.1-0.20260529125735-c0fb2339ea0a h1:25Bibg2W9KYslp9HyEvWxjNPQ10uG9uZAgVq8IVjglg= +github.com/steadybit/action-kit/go/action_kit_commons v1.7.1-0.20260529125735-c0fb2339ea0a/go.mod h1:tgL+7zGBpLZ4yMaXjSZq5ezQuaZJukSWjiRTRyBcKFw= github.com/steadybit/action-kit/go/action_kit_sdk v1.3.1 h1:c83hiU+RLWjqouWR9baiidmYcTtDTdRa5rkWKFvdbc8= github.com/steadybit/action-kit/go/action_kit_sdk v1.3.1/go.mod h1:DMMqDn4QNetxAoEXSpS7xF/vV+aD6YIDl/CgWOi5ii8= github.com/steadybit/action-kit/go/action_kit_test v1.4.7 h1:DyW3xYKQTOpCy4GLOShUXYpzfTXH/JgWOcUi5WeC55k=