Skip to content

Commit 0c203e9

Browse files
authored
Merge pull request #71 from stacklok/jaosorior/egress-dns-correctness
egress: tighten DNS answer filtering and TTL clamping
2 parents df4d4a7 + 602528d commit 0c203e9

File tree

4 files changed

+249
-6
lines changed

4 files changed

+249
-6
lines changed

net/egress/dns.go

Lines changed: 50 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -6,10 +6,21 @@ package egress
66
import (
77
"fmt"
88
"net"
9+
"strings"
910

1011
"github.com/miekg/dns"
1112
)
1213

14+
// normalizeDNSName returns s lower-cased with a single trailing dot stripped,
15+
// so that names from DNS wire format (often FQDN with trailing dot) compare
16+
// consistently against caller-supplied policy names (typically without a
17+
// trailing dot). DNS names are case-insensitive by RFC 1035.
18+
func normalizeDNSName(s string) string {
19+
s = strings.ToLower(s)
20+
s = strings.TrimSuffix(s, ".")
21+
return s
22+
}
23+
1324
// ParseDNSQuery extracts the transaction ID, question name, and query type
1425
// from a DNS query payload (UDP payload, not including Ethernet/IP/UDP headers).
1526
func ParseDNSQuery(payload []byte) (txnID uint16, qname string, qtype uint16, err error) {
@@ -52,13 +63,48 @@ func BuildNXDOMAIN(txnID uint16, qname string, qtype uint16) ([]byte, error) {
5263

5364
// ParseDNSResponse extracts the question name, A-record IPs, and minimum
5465
// TTL from a DNS response payload.
66+
//
67+
// Only A records whose owner name is reachable from the question via the
68+
// response's own CNAME chain are returned. An answer with an unrelated
69+
// owner name — for example, an injected out-of-bailiwick record — is
70+
// discarded. Without this filter, a compromised allowed zone could
71+
// smuggle arbitrary IPs into dynamic egress rules by returning them in
72+
// the Answer section under any name.
73+
//
74+
// The returned qname is normalized (lower-cased, trailing dot stripped).
5575
func ParseDNSResponse(payload []byte) (qname string, ips []net.IP, ttl uint32, err error) {
5676
var msg dns.Msg
5777
if err := msg.Unpack(payload); err != nil {
5878
return "", nil, 0, fmt.Errorf("unpack DNS response: %w", err)
5979
}
6080
if len(msg.Question) > 0 {
61-
qname = msg.Question[0].Name
81+
qname = normalizeDNSName(msg.Question[0].Name)
82+
}
83+
84+
// Build the set of owner names reachable from qname via CNAMEs in this
85+
// response. Multi-pass until no new names are added; bounded by the
86+
// Answer count so CNAME loops cannot cause infinite iteration.
87+
validNames := map[string]struct{}{qname: {}}
88+
for i := 0; i < len(msg.Answer); i++ {
89+
progress := false
90+
for _, rr := range msg.Answer {
91+
cn, ok := rr.(*dns.CNAME)
92+
if !ok {
93+
continue
94+
}
95+
owner := normalizeDNSName(cn.Hdr.Name)
96+
if _, have := validNames[owner]; !have {
97+
continue
98+
}
99+
target := normalizeDNSName(cn.Target)
100+
if _, already := validNames[target]; !already {
101+
validNames[target] = struct{}{}
102+
progress = true
103+
}
104+
}
105+
if !progress {
106+
break
107+
}
62108
}
63109

64110
var minTTL uint32
@@ -68,6 +114,9 @@ func ParseDNSResponse(payload []byte) (qname string, ips []net.IP, ttl uint32, e
68114
if !ok {
69115
continue
70116
}
117+
if _, ok := validNames[normalizeDNSName(a.Hdr.Name)]; !ok {
118+
continue
119+
}
71120
ips = append(ips, a.A)
72121
if first || a.Hdr.Ttl < minTTL {
73122
minTTL = a.Hdr.Ttl

net/egress/dns_test.go

Lines changed: 127 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -12,6 +12,31 @@ import (
1212
"github.com/stretchr/testify/require"
1313
)
1414

15+
func TestNormalizeDNSName(t *testing.T) {
16+
t.Parallel()
17+
18+
tests := []struct {
19+
in, want string
20+
}{
21+
{"example.com", "example.com"},
22+
{"example.com.", "example.com"},
23+
{"Example.COM.", "example.com"},
24+
{"API.GitHub.com", "api.github.com"},
25+
{"", ""},
26+
{".", ""},
27+
// Only a single trailing dot is stripped; double-trailing is a
28+
// malformed name and should not be coerced.
29+
{"foo..", "foo."},
30+
}
31+
32+
for _, tt := range tests {
33+
t.Run(tt.in, func(t *testing.T) {
34+
t.Parallel()
35+
assert.Equal(t, tt.want, normalizeDNSName(tt.in))
36+
})
37+
}
38+
}
39+
1540
func TestParseDNSQuery(t *testing.T) {
1641
t.Parallel()
1742

@@ -121,13 +146,114 @@ func TestParseDNSResponse(t *testing.T) {
121146

122147
qname, ips, ttl, err := ParseDNSResponse(payload)
123148
require.NoError(t, err)
124-
assert.Equal(t, "example.com.", qname)
149+
assert.Equal(t, "example.com", qname)
125150
assert.Len(t, ips, 2)
126151
assert.Equal(t, "93.184.216.34", ips[0].String())
127152
assert.Equal(t, "93.184.216.35", ips[1].String())
128153
assert.Equal(t, uint32(60), ttl) // minimum TTL
129154
}
130155

156+
func TestParseDNSResponse_DropsOutOfBailiwickAnswers(t *testing.T) {
157+
t.Parallel()
158+
159+
// Question for example.com; attacker-controlled response slips an A
160+
// record with a different owner name (typical out-of-bailiwick injection
161+
// attempt to smuggle an internal IP into dynamic rules).
162+
msg := &dns.Msg{
163+
MsgHdr: dns.MsgHdr{Id: 0x1234, Response: true},
164+
Question: []dns.Question{
165+
{Name: "example.com.", Qtype: dns.TypeA, Qclass: dns.ClassINET},
166+
},
167+
Answer: []dns.RR{
168+
&dns.A{
169+
Hdr: dns.RR_Header{Name: "example.com.", Rrtype: dns.TypeA, Class: dns.ClassINET, Ttl: 300},
170+
A: net.ParseIP("93.184.216.34"),
171+
},
172+
&dns.A{
173+
Hdr: dns.RR_Header{Name: "internal.local.", Rrtype: dns.TypeA, Class: dns.ClassINET, Ttl: 300},
174+
A: net.ParseIP("169.254.169.254"),
175+
},
176+
},
177+
}
178+
payload, err := msg.Pack()
179+
require.NoError(t, err)
180+
181+
qname, ips, _, err := ParseDNSResponse(payload)
182+
require.NoError(t, err)
183+
assert.Equal(t, "example.com", qname)
184+
require.Len(t, ips, 1)
185+
assert.Equal(t, "93.184.216.34", ips[0].String())
186+
}
187+
188+
func TestParseDNSResponse_FollowsCNAMEChain(t *testing.T) {
189+
t.Parallel()
190+
191+
// Legitimate CNAME chain: example.com -> cdn.example.net -> 1.2.3.4.
192+
// The A record's owner matches the CNAME target, which is reachable
193+
// from the question via the chain, so the IP is accepted.
194+
msg := &dns.Msg{
195+
MsgHdr: dns.MsgHdr{Id: 0x2345, Response: true},
196+
Question: []dns.Question{
197+
{Name: "example.com.", Qtype: dns.TypeA, Qclass: dns.ClassINET},
198+
},
199+
Answer: []dns.RR{
200+
&dns.CNAME{
201+
Hdr: dns.RR_Header{Name: "example.com.", Rrtype: dns.TypeCNAME, Class: dns.ClassINET, Ttl: 300},
202+
Target: "cdn.example.net.",
203+
},
204+
&dns.A{
205+
Hdr: dns.RR_Header{Name: "cdn.example.net.", Rrtype: dns.TypeA, Class: dns.ClassINET, Ttl: 300},
206+
A: net.ParseIP("1.2.3.4"),
207+
},
208+
},
209+
}
210+
payload, err := msg.Pack()
211+
require.NoError(t, err)
212+
213+
qname, ips, _, err := ParseDNSResponse(payload)
214+
require.NoError(t, err)
215+
assert.Equal(t, "example.com", qname)
216+
require.Len(t, ips, 1)
217+
assert.Equal(t, "1.2.3.4", ips[0].String())
218+
}
219+
220+
func TestParseDNSResponse_DropsUnreachableCNAMEA(t *testing.T) {
221+
t.Parallel()
222+
223+
// An A record whose owner is NOT reachable via any CNAME chain from
224+
// the question must be dropped even if another A record from the same
225+
// name-chain is valid.
226+
msg := &dns.Msg{
227+
MsgHdr: dns.MsgHdr{Id: 0x3456, Response: true},
228+
Question: []dns.Question{
229+
{Name: "example.com.", Qtype: dns.TypeA, Qclass: dns.ClassINET},
230+
},
231+
Answer: []dns.RR{
232+
&dns.CNAME{
233+
Hdr: dns.RR_Header{Name: "example.com.", Rrtype: dns.TypeCNAME, Class: dns.ClassINET, Ttl: 300},
234+
Target: "cdn.example.net.",
235+
},
236+
// A record for an unrelated name slipped into the Answer section.
237+
&dns.A{
238+
Hdr: dns.RR_Header{Name: "attacker.example.net.", Rrtype: dns.TypeA, Class: dns.ClassINET, Ttl: 300},
239+
A: net.ParseIP("10.0.0.1"),
240+
},
241+
// Legitimate A record at the CNAME target.
242+
&dns.A{
243+
Hdr: dns.RR_Header{Name: "cdn.example.net.", Rrtype: dns.TypeA, Class: dns.ClassINET, Ttl: 300},
244+
A: net.ParseIP("1.2.3.4"),
245+
},
246+
},
247+
}
248+
payload, err := msg.Pack()
249+
require.NoError(t, err)
250+
251+
_, ips, _, err := ParseDNSResponse(payload)
252+
require.NoError(t, err)
253+
require.Len(t, ips, 1)
254+
assert.Equal(t, "1.2.3.4", ips[0].String())
255+
}
256+
131257
func TestParseDNSResponse_NoARecords(t *testing.T) {
132258
t.Parallel()
133259

net/egress/interceptor.go

Lines changed: 40 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -16,6 +16,13 @@ const (
1616
// if the DNS response has a shorter TTL. This prevents excessive
1717
// rule churn from very short TTLs.
1818
defaultMinTTL = 60 * time.Second
19+
20+
// defaultMaxTTL is the maximum TTL applied to dynamic rules, even
21+
// if the DNS response advertises a longer TTL. This bounds how long
22+
// a resolved IP remains allowed — useful if an upstream server
23+
// returns very long TTLs, and as belt-and-suspenders against a
24+
// compromised zone returning long-lived rogue answers.
25+
defaultMaxTTL = 5 * time.Minute
1926
)
2027

2128
// DNSInterceptor intercepts DNS traffic at the relay level to enforce
@@ -25,21 +32,47 @@ type DNSInterceptor struct {
2532
policy *Policy
2633
dynamicRules *firewall.DynamicRules
2734
minTTL time.Duration
35+
maxTTL time.Duration
2836
gatewayIP [4]byte
2937
}
3038

39+
// DNSInterceptorOption customizes a DNSInterceptor.
40+
type DNSInterceptorOption func(*DNSInterceptor)
41+
42+
// WithMinTTL sets the minimum TTL applied to dynamic rules. A zero or
43+
// negative value leaves the default in place.
44+
func WithMinTTL(d time.Duration) DNSInterceptorOption {
45+
return func(i *DNSInterceptor) {
46+
if d > 0 {
47+
i.minTTL = d
48+
}
49+
}
50+
}
51+
52+
// WithMaxTTL sets the maximum TTL applied to dynamic rules. A zero or
53+
// negative value disables the cap (any TTL is accepted).
54+
func WithMaxTTL(d time.Duration) DNSInterceptorOption {
55+
return func(i *DNSInterceptor) { i.maxTTL = d }
56+
}
57+
3158
// NewDNSInterceptor creates an interceptor with the given policy, dynamic
3259
// rule set, and gateway IP. Only DNS responses from the gateway are
3360
// snooped to prevent spoofed responses from creating dynamic rules.
34-
// Dynamic rules created from DNS responses will have at least minTTL
35-
// duration (use 0 for the default of 60 seconds).
36-
func NewDNSInterceptor(policy *Policy, dr *firewall.DynamicRules, gatewayIP [4]byte) *DNSInterceptor {
37-
return &DNSInterceptor{
61+
//
62+
// By default, dynamic rules are clamped to minTTL=60s and maxTTL=5m;
63+
// override via WithMinTTL / WithMaxTTL.
64+
func NewDNSInterceptor(policy *Policy, dr *firewall.DynamicRules, gatewayIP [4]byte, opts ...DNSInterceptorOption) *DNSInterceptor {
65+
i := &DNSInterceptor{
3866
policy: policy,
3967
dynamicRules: dr,
4068
minTTL: defaultMinTTL,
69+
maxTTL: defaultMaxTTL,
4170
gatewayIP: gatewayIP,
4271
}
72+
for _, o := range opts {
73+
o(i)
74+
}
75+
return i
4376
}
4477

4578
// HandleEgress processes an outbound DNS query frame. If the queried
@@ -118,6 +151,9 @@ func (d *DNSInterceptor) HandleIngress(frame []byte, hdr *firewall.PacketHeader)
118151
if ttl < d.minTTL {
119152
ttl = d.minTTL
120153
}
154+
if d.maxTTL > 0 && ttl > d.maxTTL {
155+
ttl = d.maxTTL
156+
}
121157

122158
ports, proto := d.policy.HostPorts(qname)
123159

net/egress/interceptor_test.go

Lines changed: 32 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -7,6 +7,7 @@ import (
77
"encoding/binary"
88
"net"
99
"testing"
10+
"time"
1011

1112
mdns "github.com/miekg/dns"
1213
"github.com/stretchr/testify/assert"
@@ -304,6 +305,37 @@ func TestDNSInterceptor_ResponseFromNonGateway_Ignored(t *testing.T) {
304305
"DNS responses from non-gateway sources must not create dynamic rules")
305306
}
306307

308+
func TestDNSInterceptor_ClampsTTLAtMaximum(t *testing.T) {
309+
t.Parallel()
310+
311+
policy := NewPolicy([]HostSpec{{Name: "example.com"}})
312+
dr := firewall.NewDynamicRules()
313+
interceptor := NewDNSInterceptor(policy, dr, testDstIP,
314+
WithMinTTL(1*time.Microsecond),
315+
WithMaxTTL(5*time.Millisecond),
316+
)
317+
318+
// Response advertises TTL = 1 hour; should be clamped to 5 ms.
319+
ips := []net.IP{net.ParseIP("1.2.3.4")}
320+
frame := buildDNSResponseFrame(testDstMAC, testSrcMAC, testDstIP, testSrcIP, 12345, "example.com", ips, 3600)
321+
hdr := firewall.ParseHeaders(frame)
322+
323+
interceptor.HandleIngress(frame, hdr)
324+
325+
probe := &firewall.PacketHeader{
326+
DstIP: [4]byte{1, 2, 3, 4},
327+
Protocol: 6,
328+
DstPort: 443,
329+
}
330+
_, ok := dr.Match(firewall.Egress, probe)
331+
require.True(t, ok, "rule should be live immediately after ingress")
332+
333+
// Give the clamp time to elapse; the rule must no longer match.
334+
time.Sleep(25 * time.Millisecond)
335+
_, ok = dr.Match(firewall.Egress, probe)
336+
assert.False(t, ok, "rule should have expired past maxTTL clamp")
337+
}
338+
307339
func TestDNSInterceptor_ExplicitProtocol_SingleRule(t *testing.T) {
308340
t.Parallel()
309341

0 commit comments

Comments
 (0)