Skip to content

Commit a71d565

Browse files
authored
feat(tests): daemon roundtrip smoke — handshake + send-message (PILOT-163) (#228)
* feat(tests): daemon roundtrip smoke (PILOT-163) Adds TestDaemonRoundtripHandshakeAndSendMessage to tests/, which boots two daemons with encryption enabled, performs a mutual handshake (auto-approved on both sides), listens on B, dials from A, writes a payload over the encrypted tunnel, and verifies B reads back the same bytes. Runs in regular CI (no nightly build tag) and completes in sub-second so the per-PR cost is negligible. Replaces the pre-existing canary-side smoke that only probed /api/stats — that path could green-light a build whose protocol layer was silently broken; this test catches it. What's NOT covered (deliberate, has dedicated tests): NAT traversal, long-running rekey, registry failover, plugin shutdown. * fix(tests): drop dead dialCtx/dialCancel left over from refactor
1 parent 3b5e4f9 commit a71d565

1 file changed

Lines changed: 146 additions & 0 deletions

File tree

tests/zz_roundtrip_test.go

Lines changed: 146 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,146 @@
1+
// SPDX-License-Identifier: AGPL-3.0-or-later
2+
3+
package tests
4+
5+
import (
6+
"bytes"
7+
"testing"
8+
"time"
9+
10+
"github.com/TeoSlayer/pilotprotocol/pkg/daemon"
11+
)
12+
13+
// TestDaemonRoundtripHandshakeAndSendMessage (PILOT-163) is the smoke
14+
// regression gate that every PR runs in the standard test step. It
15+
// proves the protocol's happy path — handshake establishment, mutual
16+
// trust, then a port listen + dial + write + accept + read — wires
17+
// together end-to-end at every commit.
18+
//
19+
// Prior CI smoke only probed /api/stats and counted pod readiness;
20+
// it would have green-lighted a build whose protocol layer was
21+
// silently broken. This test runs in <500 ms in regular CI (no
22+
// nightly build tag) so the cost of catching such breakage early is
23+
// negligible.
24+
//
25+
// What's covered:
26+
// - Daemon boot (env.AddDaemon × 2 with encryption on)
27+
// - drv.Handshake() round-trip and PendingHandshakes/TrustedPeers
28+
// reaching mutual-trust state
29+
// - drv.Listen() port-binding via the driver socket
30+
// - drv.DialAddr() + Conn.Write() over the encrypted tunnel
31+
// - Listener.Accept() + Conn.Read() recovering the same payload bytes
32+
//
33+
// What's NOT covered (deliberate): NAT traversal (loopback only),
34+
// long-running rekey, registry failover, plugin shutdown. Those have
35+
// dedicated tests; this one is the smoke gate.
36+
func TestDaemonRoundtripHandshakeAndSendMessage(t *testing.T) {
37+
t.Parallel()
38+
39+
env := NewTestEnv(t)
40+
a := env.AddDaemon(func(c *daemon.Config) {
41+
c.Encrypt = true
42+
})
43+
b := env.AddDaemon(func(c *daemon.Config) {
44+
c.Encrypt = true
45+
})
46+
47+
t.Logf("daemon A: node=%d addr=%s", a.Daemon.NodeID(), a.Daemon.Addr())
48+
t.Logf("daemon B: node=%d addr=%s", b.Daemon.NodeID(), b.Daemon.Addr())
49+
50+
// Mutual handshake — both directions auto-approve as a pair.
51+
if _, err := a.Driver.Handshake(b.Daemon.NodeID(), "PILOT-163 roundtrip"); err != nil {
52+
t.Fatalf("A handshake to B: %v", err)
53+
}
54+
// Wait for B to see A's request in pending before B initiates back —
55+
// avoids a race where B's request lands first and A then rejects on
56+
// the duplicate-request guard.
57+
if !waitFor(2*time.Second, func() bool {
58+
p, err := b.Driver.PendingHandshakes()
59+
if err != nil {
60+
return false
61+
}
62+
list, _ := p["pending"].([]interface{})
63+
return len(list) > 0
64+
}) {
65+
t.Fatal("A's handshake never reached B (pending list stayed empty)")
66+
}
67+
if _, err := b.Driver.Handshake(a.Daemon.NodeID(), "PILOT-163 roundtrip reply"); err != nil {
68+
t.Fatalf("B handshake to A: %v", err)
69+
}
70+
71+
if !waitFor(3*time.Second, func() bool {
72+
ta, errA := a.Driver.TrustedPeers()
73+
tb, errB := b.Driver.TrustedPeers()
74+
if errA != nil || errB != nil {
75+
return false
76+
}
77+
la, _ := ta["trusted"].([]interface{})
78+
lb, _ := tb["trusted"].([]interface{})
79+
return len(la) > 0 && len(lb) > 0
80+
}) {
81+
t.Fatal("mutual trust state never reached within 3s")
82+
}
83+
84+
// Listen on B, dial-and-write from A, read back on B.
85+
const port = uint16(7163)
86+
const payloadStr = "PILOT-163 roundtrip payload"
87+
ln, err := b.Driver.Listen(port)
88+
if err != nil {
89+
t.Fatalf("B listen on %d: %v", port, err)
90+
}
91+
t.Cleanup(func() { _ = ln.Close() })
92+
93+
// Accept in a goroutine so we don't block the dial.
94+
type recvResult struct {
95+
data []byte
96+
err error
97+
}
98+
recv := make(chan recvResult, 1)
99+
go func() {
100+
conn, err := ln.Accept()
101+
if err != nil {
102+
recv <- recvResult{err: err}
103+
return
104+
}
105+
defer conn.Close()
106+
_ = conn.SetReadDeadline(time.Now().Add(2 * time.Second))
107+
buf := make([]byte, 4096)
108+
n, err := conn.Read(buf)
109+
recv <- recvResult{data: buf[:n], err: err}
110+
}()
111+
112+
conn, err := a.Driver.DialAddrTimeout(b.Daemon.Addr(), port, 2*time.Second)
113+
if err != nil {
114+
t.Fatalf("A dial B:%d: %v", port, err)
115+
}
116+
if _, err := conn.Write([]byte(payloadStr)); err != nil {
117+
_ = conn.Close()
118+
t.Fatalf("A write: %v", err)
119+
}
120+
_ = conn.Close()
121+
122+
select {
123+
case r := <-recv:
124+
if r.err != nil {
125+
t.Fatalf("B read: %v", r.err)
126+
}
127+
if !bytes.Equal(r.data, []byte(payloadStr)) {
128+
t.Errorf("B got %q, want %q", string(r.data), payloadStr)
129+
}
130+
case <-time.After(3 * time.Second):
131+
t.Fatal("B never received the message")
132+
}
133+
}
134+
135+
// waitFor polls cond every 25 ms until it returns true or the deadline
136+
// elapses. Returns the final cond result.
137+
func waitFor(timeout time.Duration, cond func() bool) bool {
138+
deadline := time.Now().Add(timeout)
139+
for time.Now().Before(deadline) {
140+
if cond() {
141+
return true
142+
}
143+
time.Sleep(25 * time.Millisecond)
144+
}
145+
return false
146+
}

0 commit comments

Comments
 (0)