Skip to content

Commit 026416e

Browse files
committed
fix: filter stale addresses from provider and peer responses
some third-party DHT peers have a bug where they never clean up old addresses they observed for other peers. if a peer behind a consumer NAT gets a new port via UPnP, or the ISP rotates its IP, the old address stays in the DHT forever. over time a peer can end up with dozens of dead addresses in front of the one that actually works, so clients time out before they ever reach it. this mostly hurts direct retrieval from self-hosted peers on consumer networks with dynamic IPs or ports (UPnP, NAT-PMP, and similar). this adds two layers of cleanup: passive filtering runs inline on every response. when someguy has previously connected to a peer, it remembers which address worked and drops other addresses on the same IP and transport that have a different (stale) port. active probing (SOMEGUY_CACHED_ADDR_BOOK_STALE_PROBING, on by default) kicks in for first-encounter peers whose address set looks suspicious: multiple ports on the same IP, or more than three IPs in one address family. each unique address is probed with a quick libp2p handshake in the background. records that don't need probing stream through right away; probed results show up at the end. if every probe fails the peer is probably offline and all addresses are returned as-is (fail-open). also fixes a race in cached_addr_book where concurrent requests could clobber each other's peerCache updates (Peek + Add without a lock).
1 parent 72b6733 commit 026416e

12 files changed

Lines changed: 1292 additions & 22 deletions

CHANGELOG.md

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -21,6 +21,10 @@ The following emojis are used to highlight certain changes:
2121

2222
### Fixed
2323

24+
- ✨ Some faulty third-party DHT peers never expire old observed addresses. Peers with dynamic ports (e.g. UPnP on consumer routers) or changing IPs (roaming, ISP changes) accumulate dead addresses over time. A provider record with 60 stale addresses before the one that works makes the peer effectively unreachable, degrading routing results for everyone downstream of someguy. This release adds two layers of stale address filtering:
25+
- **Passive filtering** (fast, inline): after a successful connection, someguy remembers the working address and strips addresses on the same IP and transport that have a different (outdated) port.
26+
- **Active probing** (async, non-blocking, controlled by `SOMEGUY_CACHED_ADDR_BOOK_STALE_PROBING`): on first encounter, when a peer's address set looks suspicious (multiple ports per IP, or more than 3 IPs per address family), each unique address is probed in the background with an ephemeral libp2p handshake. Records that don't need probing stream through immediately; probed results appear at the end of the response once the handshakes complete. If every probe fails (peer offline), all addresses are returned unchanged (fail-open).
27+
2428
### Security
2529

2630
## [v0.11.1]

addr_filter.go

Lines changed: 192 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,192 @@
1+
// addr_filter.go provides passive stale-address filtering and detection
2+
// heuristics for the active probing layer (see addr_prober.go).
3+
//
4+
// Problem: some DHT server implementations never expire old observed
5+
// addresses for a peer. Peers with dynamic ports (e.g. UPnP on consumer
6+
// routers) or changing IPs (roaming, ISP changes) accumulate dead addresses
7+
// over time. A provider record with 60 dead port addresses before the one
8+
// that works makes the peer effectively unreachable.
9+
//
10+
// Passive filtering (filterStalePortAddrs): when someguy has previously
11+
// connected to a peer, it remembers the working address. On subsequent
12+
// lookups, addresses on the same IP and layer-4 protocol but with a
13+
// different port are stripped out. This is fast and runs inline.
14+
//
15+
// Detection (needsProbing): when no known-good address exists (first
16+
// encounter), this heuristic checks whether the address set looks
17+
// suspicious -- multiple ports on the same (IP, L4), or multiple IPs
18+
// within the same address family. If so, the record is handed to the
19+
// async probing layer (probeFilterIter in server_routers.go).
20+
package main
21+
22+
import (
23+
"strconv"
24+
25+
"github.com/ipfs/boxo/routing/http/types"
26+
ma "github.com/multiformats/go-multiaddr"
27+
"github.com/prometheus/client_golang/prometheus"
28+
"github.com/prometheus/client_golang/prometheus/promauto"
29+
)
30+
31+
var staleAddrsFilteredCounter = promauto.NewCounter(prometheus.CounterOpts{
32+
Name: "stale_addrs_filtered",
33+
Namespace: name,
34+
Subsystem: "addr_filter",
35+
Help: "Number of stale addresses filtered from responses (same IP, different port from last known-good connection)",
36+
})
37+
38+
// addrTransportKey groups multiaddrs by IP address and layer-4 protocol.
39+
// Multiaddrs sharing the same key but differing only in port are
40+
// candidates for stale address filtering.
41+
type addrTransportKey struct {
42+
ip string // e.g. "209.222.4.177" or "2001:db8::1"
43+
l4Code int // ma.P_TCP or ma.P_UDP
44+
}
45+
46+
// extractAddrTransportKey returns the IP, layer-4 protocol, and port from a
47+
// multiaddr. Returns false for relay (circuit), HTTP, and DNS addresses, or
48+
// multiaddrs without a standard IP + transport structure.
49+
func extractAddrTransportKey(addr ma.Multiaddr) (key addrTransportKey, port int, ok bool) {
50+
// skip relay addresses: the IP/port belongs to the relay, not the peer
51+
if _, err := addr.ValueForProtocol(ma.P_CIRCUIT); err == nil {
52+
return addrTransportKey{}, 0, false
53+
}
54+
55+
// skip HTTP addresses: trustless gateway, not a libp2p peer
56+
if _, err := addr.ValueForProtocol(ma.P_HTTP); err == nil {
57+
return addrTransportKey{}, 0, false
58+
}
59+
60+
if v, err := addr.ValueForProtocol(ma.P_IP4); err == nil {
61+
key.ip = v
62+
} else if v, err := addr.ValueForProtocol(ma.P_IP6); err == nil {
63+
key.ip = v
64+
} else {
65+
return addrTransportKey{}, 0, false
66+
}
67+
68+
if v, err := addr.ValueForProtocol(ma.P_TCP); err == nil {
69+
key.l4Code = ma.P_TCP
70+
port, _ = strconv.Atoi(v)
71+
ok = true
72+
} else if v, err := addr.ValueForProtocol(ma.P_UDP); err == nil {
73+
key.l4Code = ma.P_UDP
74+
port, _ = strconv.Atoi(v)
75+
ok = true
76+
}
77+
return
78+
}
79+
80+
// filterStalePortAddrs removes multiaddrs that share the same (IP, layer-4
81+
// protocol) as connectedAddr but have a different port. These are likely
82+
// stale port forwards from old NAT mappings.
83+
//
84+
// Addrs on different IPs, different L4 protocols, or unparseable addrs
85+
// are kept unchanged.
86+
func filterStalePortAddrs(addrs []types.Multiaddr, connectedAddr ma.Multiaddr) []types.Multiaddr {
87+
if connectedAddr == nil || len(addrs) == 0 {
88+
return addrs
89+
}
90+
91+
goodKey, goodPort, ok := extractAddrTransportKey(connectedAddr)
92+
if !ok {
93+
return addrs
94+
}
95+
96+
result := make([]types.Multiaddr, 0, len(addrs))
97+
var filtered int
98+
99+
for _, addr := range addrs {
100+
key, port, ok := extractAddrTransportKey(addr.Multiaddr)
101+
if !ok || key != goodKey {
102+
result = append(result, addr)
103+
continue
104+
}
105+
if port == goodPort {
106+
result = append(result, addr)
107+
} else {
108+
filtered++
109+
}
110+
}
111+
112+
if filtered > 0 {
113+
staleAddrsFilteredCounter.Add(float64(filtered))
114+
}
115+
return result
116+
}
117+
118+
// needsProbing returns true when the addr set shows signs of stale addresses:
119+
// - multi-port: any (IP, L4) group has more than one distinct port
120+
// - multi-IP: any address family (v4 or v6) has more than one distinct IP
121+
func needsProbing(addrs []types.Multiaddr) bool {
122+
type ipL4 struct {
123+
ip string
124+
l4Code int
125+
}
126+
127+
ports := make(map[ipL4]map[int]struct{})
128+
v4IPs := make(map[string]struct{})
129+
v6IPs := make(map[string]struct{})
130+
131+
for _, addr := range addrs {
132+
key, port, ok := extractAddrTransportKey(addr.Multiaddr)
133+
if !ok {
134+
continue
135+
}
136+
137+
k := ipL4{ip: key.ip, l4Code: key.l4Code}
138+
if ports[k] == nil {
139+
ports[k] = make(map[int]struct{})
140+
}
141+
ports[k][port] = struct{}{}
142+
143+
// track distinct IPs per address family
144+
if _, err := addr.Multiaddr.ValueForProtocol(ma.P_IP4); err == nil {
145+
v4IPs[key.ip] = struct{}{}
146+
} else if _, err := addr.Multiaddr.ValueForProtocol(ma.P_IP6); err == nil {
147+
v6IPs[key.ip] = struct{}{}
148+
}
149+
}
150+
151+
// multi-port: any (IP, L4) has >1 port
152+
for _, ps := range ports {
153+
if len(ps) > 1 {
154+
return true
155+
}
156+
}
157+
158+
// multi-IP: same address family has many distinct IPs.
159+
// 2-3 IPs is normal (dual WAN, cloud instances with public + VPC),
160+
// but 4+ within a single family suggests stale addrs from ISP/roaming changes not being expired by some poorly written third-party DHT peers.
161+
if len(v4IPs) > 3 || len(v6IPs) > 3 {
162+
return true
163+
}
164+
165+
return false
166+
}
167+
168+
// findStalePortAddrs returns multiaddrs from allAddrs that share the same
169+
// (IP, layer-4 protocol) as connectedAddr but have a different port.
170+
// Used for cleaning up stale entries from the addr book cache.
171+
func findStalePortAddrs(allAddrs []ma.Multiaddr, connectedAddr ma.Multiaddr) []ma.Multiaddr {
172+
if connectedAddr == nil || len(allAddrs) == 0 {
173+
return nil
174+
}
175+
176+
goodKey, goodPort, ok := extractAddrTransportKey(connectedAddr)
177+
if !ok {
178+
return nil
179+
}
180+
181+
var stale []ma.Multiaddr
182+
for _, addr := range allAddrs {
183+
key, port, ok := extractAddrTransportKey(addr)
184+
if !ok || key != goodKey {
185+
continue
186+
}
187+
if port != goodPort {
188+
stale = append(stale, addr)
189+
}
190+
}
191+
return stale
192+
}

0 commit comments

Comments
 (0)