|
| 1 | +// SPDX-License-Identifier: AGPL-3.0-or-later |
| 2 | + |
| 3 | +package keyexchange_test |
| 4 | + |
| 5 | +// Regression for the same-session log spam observed against list-agents |
| 6 | +// on 2026-05-29: a fresh daemon registers, completes the initial PILA |
| 7 | +// exchange (1 install, 1 postInstall, 1 "encrypted tunnel established" |
| 8 | +// at Info), then the peer keeps retransmitting the SAME PILA at ~8 s |
| 9 | +// cadence while the relayed data plane drops our PILS replies. Every |
| 10 | +// such retransmit lands past DuplicateHandshakeDebounce (250 ms), so |
| 11 | +// the duplicate gate doesn't catch it, but it carries the same X25519 |
| 12 | +// ephemeral so hadCrypto=true && keyChanged=false — structurally no |
| 13 | +// new install. Pre-fix the daemon logged "encrypted tunnel established" |
| 14 | +// at Info every time (35 events per peer per 5 min in field observation). |
| 15 | +// |
| 16 | +// The fix demotes the log to Debug for the same-session case while |
| 17 | +// preserving: |
| 18 | +// |
| 19 | +// - the existing duplicate-within-debounce coalescing |
| 20 | +// (TestDuplicatePILACoalescedSuppressesLogAndHook) |
| 21 | +// |
| 22 | +// - the past-debounce postInstall hook firing for endpoint refresh |
| 23 | +// (TestDuplicatePILAOutsideDebounceFiresHookAgain pins hook count = 2) |
| 24 | +// |
| 25 | +// - the asymmetric-recovery reply on stale inbound |
| 26 | +// (TestDuplicatePILAStillRepliesForAsymmetricRecovery) |
| 27 | + |
| 28 | +import ( |
| 29 | + "bytes" |
| 30 | + "log/slog" |
| 31 | + "net" |
| 32 | + "strings" |
| 33 | + "sync" |
| 34 | + "sync/atomic" |
| 35 | + "testing" |
| 36 | + "time" |
| 37 | + |
| 38 | + "github.com/TeoSlayer/pilotprotocol/pkg/daemon/keyexchange" |
| 39 | +) |
| 40 | + |
| 41 | +// syncWriter serialises Writes from slog handlers so concurrent |
| 42 | +// goroutines logging into the same bytes.Buffer don't race the writes. |
| 43 | +type syncWriter struct { |
| 44 | + w *bytes.Buffer |
| 45 | + mu *sync.Mutex |
| 46 | +} |
| 47 | + |
| 48 | +func (s syncWriter) Write(p []byte) (int, error) { |
| 49 | + s.mu.Lock() |
| 50 | + defer s.mu.Unlock() |
| 51 | + return s.w.Write(p) |
| 52 | +} |
| 53 | + |
| 54 | +// captureSlog redirects slog.Default to a buffer at the given level |
| 55 | +// and returns a buffer-content snapshot + a restore func. NOT safe to |
| 56 | +// use with t.Parallel() — slog.Default is process-global and parallel |
| 57 | +// tests racing SetDefault see each other's handlers. |
| 58 | +func captureSlog(t *testing.T, level slog.Level) (snapshot func() string, restore func()) { |
| 59 | + t.Helper() |
| 60 | + var ( |
| 61 | + buf bytes.Buffer |
| 62 | + mu sync.Mutex |
| 63 | + ) |
| 64 | + handler := slog.NewTextHandler(syncWriter{w: &buf, mu: &mu}, &slog.HandlerOptions{Level: level}) |
| 65 | + prev := slog.Default() |
| 66 | + slog.SetDefault(slog.New(handler)) |
| 67 | + return func() string { |
| 68 | + mu.Lock() |
| 69 | + defer mu.Unlock() |
| 70 | + return buf.String() |
| 71 | + }, func() { |
| 72 | + slog.SetDefault(prev) |
| 73 | + } |
| 74 | +} |
| 75 | + |
| 76 | +// TestSameSessionPILASuppressesInfoButFiresHookAndDebug pins the fix |
| 77 | +// end-to-end: |
| 78 | +// |
| 79 | +// - First PILA: Info "encrypted tunnel established", hook count = 1. |
| 80 | +// |
| 81 | +// - Second same-key PILA past the debounce window: |
| 82 | +// |
| 83 | +// - hook still fires (count = 2, pinned for endpoint refresh by |
| 84 | +// TestDuplicatePILAOutsideDebounceFiresHookAgain). |
| 85 | +// |
| 86 | +// - NO second "encrypted tunnel established" at Info — that was the |
| 87 | +// spam pre-fix. |
| 88 | +// |
| 89 | +// - Debug-level "same-session keepalive" present (diagnostic |
| 90 | +// remains available for operators tracing key-exchange flow). |
| 91 | +// |
| 92 | +// The two slog assertions share one capture buffer (and therefore one |
| 93 | +// SetDefault call) because parallel-test races on slog.Default would |
| 94 | +// otherwise tear the captured output. The test itself is NOT marked |
| 95 | +// t.Parallel(). |
| 96 | +func TestSameSessionPILASuppressesInfoButFiresHookAndDebug(t *testing.T) { |
| 97 | + a := newPeer(t, 510) |
| 98 | + b := newPeer(t, 511) |
| 99 | + crossWireVerifyFuncs(a, b) |
| 100 | + a.mgr.SetSender(func(uint32, *net.UDPAddr, []byte) error { return nil }) |
| 101 | + |
| 102 | + var hookCount atomic.Int32 |
| 103 | + a.mgr.SetPostInstallHook(func(keyexchange.PostInstallEvent) { |
| 104 | + hookCount.Add(1) |
| 105 | + }) |
| 106 | + |
| 107 | + bFrame := b.mgr.BuildAuthFrame() |
| 108 | + if bFrame == nil { |
| 109 | + t.Fatalf("BuildAuthFrame returned nil") |
| 110 | + } |
| 111 | + from := &net.UDPAddr{IP: net.IPv4(127, 0, 0, 1), Port: 4000} |
| 112 | + |
| 113 | + // Capture at Debug so both the Info "established" on the first |
| 114 | + // arrival and the Debug "same-session keepalive" on the second |
| 115 | + // arrival land in the same buffer. |
| 116 | + logSnap, restore := captureSlog(t, slog.LevelDebug) |
| 117 | + defer restore() |
| 118 | + |
| 119 | + // --- First PILA: full install path --------------------------------- |
| 120 | + |
| 121 | + if !a.mgr.HandleAuthFrame(bFrame[4:], from, false) { |
| 122 | + t.Fatalf("first PILA rejected") |
| 123 | + } |
| 124 | + if got := hookCount.Load(); got != 1 { |
| 125 | + t.Fatalf("after first PILA: hook count = %d, want 1", got) |
| 126 | + } |
| 127 | + initialLog := logSnap() |
| 128 | + if !strings.Contains(initialLog, "encrypted tunnel established") { |
| 129 | + t.Fatalf("first PILA: expected Info log 'encrypted tunnel established', got:\n%s", initialLog) |
| 130 | + } |
| 131 | + initialEstablishedCount := strings.Count(initialLog, "encrypted tunnel established") |
| 132 | + if initialEstablishedCount != 1 { |
| 133 | + t.Fatalf("first PILA: expected exactly 1 'established' log line, got %d:\n%s", |
| 134 | + initialEstablishedCount, initialLog) |
| 135 | + } |
| 136 | + |
| 137 | + // --- Second PILA past debounce: keepalive, no spam ----------------- |
| 138 | + |
| 139 | + time.Sleep(keyexchange.DuplicateHandshakeDebounce + 100*time.Millisecond) |
| 140 | + |
| 141 | + if !a.mgr.HandleAuthFrame(bFrame[4:], from, false) { |
| 142 | + t.Fatalf("same-session PILA rejected") |
| 143 | + } |
| 144 | + |
| 145 | + // Endpoint-refresh contract from TestDuplicatePILAOutsideDebounceFiresHookAgain. |
| 146 | + if got := hookCount.Load(); got != 2 { |
| 147 | + t.Fatalf("after same-session PILA: hook count = %d, want 2 (must still refresh endpoint)", got) |
| 148 | + } |
| 149 | + |
| 150 | + finalLog := logSnap() |
| 151 | + |
| 152 | + // The new behaviour: the Info "established" count stays at one |
| 153 | + // (no spam from the second arrival). |
| 154 | + finalEstablishedCount := strings.Count(finalLog, "encrypted tunnel established") |
| 155 | + if finalEstablishedCount != 1 { |
| 156 | + t.Fatalf("after same-session PILA: 'established' log count = %d, want 1 (no Info spam):\n%s", |
| 157 | + finalEstablishedCount, finalLog) |
| 158 | + } |
| 159 | + |
| 160 | + // The Debug diagnostic remains so operators can see the keepalive. |
| 161 | + if !strings.Contains(finalLog, "same-session keepalive") { |
| 162 | + t.Fatalf("after same-session PILA: expected Debug log 'same-session keepalive'; got:\n%s", |
| 163 | + finalLog) |
| 164 | + } |
| 165 | +} |
0 commit comments