Skip to content

Commit b25fc58

Browse files
authored
randomize machine and pid bytes in client correlation id so OAST callbacks stop leaking a per-host fingerprint (#1378)
1 parent 4a85ec6 commit b25fc58

2 files changed

Lines changed: 101 additions & 2 deletions

File tree

pkg/client/client.go

Lines changed: 26 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -150,8 +150,17 @@ func New(options *Options) (*Client, error) {
150150
secretKey = options.SessionInfo.SecretKey
151151
token = options.SessionInfo.Token
152152
} else {
153-
// Generate a random ksuid which will be used as server secret.
154-
correlationID = xid.New().String()
153+
// Use a privacy-preserving xid: keep the 4-byte timestamp prefix so the id
154+
// stays k-ordered and parseable, but randomize the trailing 8 bytes
155+
// (machine + pid + counter). Plain xid.New() leaks a stable per-machine
156+
// fingerprint (md5(hostname)[:3]) through every OAST callback, which is
157+
// observable by the target service and by third-party telemetry that
158+
// logs OAST traffic (see issue #1349).
159+
anonID, err := newAnonymousCorrelationID()
160+
if err != nil {
161+
return nil, errkit.Wrap(err, "could not generate correlation id")
162+
}
163+
correlationID = anonID
155164
if len(correlationID) > options.CorrelationIdLength {
156165
correlationID = correlationID[:options.CorrelationIdLength]
157166
}
@@ -721,3 +730,18 @@ func (c *Client) SaveSessionTo(filename string) error {
721730
}
722731
return os.WriteFile(filename, data, os.ModePerm)
723732
}
733+
734+
// newAnonymousCorrelationID returns an xid-formatted correlation id that
735+
// preserves the 4-byte timestamp prefix (for k-ordering and the server-side
736+
// xidAlphabet validator) and replaces the remaining 8 bytes (machine, pid,
737+
// counter) with crypto/rand. The result is still a valid xid string, still
738+
// 20 chars from the [0-9a-v] alphabet, and still parseable with xid.FromString.
739+
func newAnonymousCorrelationID() (string, error) {
740+
id := xid.NewWithTime(time.Now())
741+
// Overwrite machine (4..6), pid (7..8), counter (9..11) with random bytes.
742+
// id is xid.ID ([12]byte) and id[4:] aliases the same backing array.
743+
if _, err := rand.Read(id[4:]); err != nil {
744+
return "", err
745+
}
746+
return id.String(), nil
747+
}

pkg/client/correlation_id_test.go

Lines changed: 75 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,75 @@
1+
package client
2+
3+
import (
4+
"regexp"
5+
"testing"
6+
"time"
7+
8+
"github.com/rs/xid"
9+
"github.com/stretchr/testify/require"
10+
)
11+
12+
// xidAlphabet matches the server-side validator (see pkg/server/util.go).
13+
var xidAlphabet = regexp.MustCompile(`^[0-9a-v]{20}$`)
14+
15+
func TestAnonymousCorrelationIDFormat(t *testing.T) {
16+
id, err := newAnonymousCorrelationID()
17+
require.NoError(t, err)
18+
require.Len(t, id, 20)
19+
require.Regexp(t, xidAlphabet, id, "must match server xid alphabet validator")
20+
21+
parsed, err := xid.FromString(id)
22+
require.NoError(t, err, "must remain parseable as xid")
23+
require.WithinDuration(t, time.Now(), parsed.Time(), 5*time.Second,
24+
"timestamp prefix must encode the current time")
25+
}
26+
27+
// TestAnonymousCorrelationIDNotFingerprinted asserts that the machine and pid
28+
// bytes are randomised: xid.New() returns the same machine+pid for every call
29+
// in the same process; newAnonymousCorrelationID() must not.
30+
func TestAnonymousCorrelationIDNotFingerprinted(t *testing.T) {
31+
const samples = 64
32+
33+
machineSet := make(map[string]struct{}, samples)
34+
pidSet := make(map[uint16]struct{}, samples)
35+
36+
for range samples {
37+
raw, err := newAnonymousCorrelationID()
38+
require.NoError(t, err)
39+
id, err := xid.FromString(raw)
40+
require.NoError(t, err)
41+
machineSet[string(id.Machine())] = struct{}{}
42+
pidSet[id.Pid()] = struct{}{}
43+
}
44+
45+
// With 64 samples and full randomness, the probability of getting a
46+
// single repeated 3-byte machine value is ~64^2 / 2^25 ≈ 1.2e-4. A
47+
// fingerprinted id would collapse to a single value, so even a weak
48+
// lower bound here catches the regression.
49+
require.Greater(t, len(machineSet), samples/2,
50+
"machine bytes must be randomised across calls")
51+
require.Greater(t, len(pidSet), samples/2,
52+
"pid bytes must be randomised across calls")
53+
}
54+
55+
func TestAnonymousCorrelationIDDistinctFromXidNew(t *testing.T) {
56+
// xid.New() inherits a stable machine fingerprint per process; the
57+
// anonymous helper must not.
58+
stable := xid.New().Machine()
59+
collisions := 0
60+
const samples = 32
61+
for range samples {
62+
raw, err := newAnonymousCorrelationID()
63+
require.NoError(t, err)
64+
id, err := xid.FromString(raw)
65+
require.NoError(t, err)
66+
if string(id.Machine()) == string(stable) {
67+
collisions++
68+
}
69+
}
70+
// A truly fingerprinted helper would collide on every sample. Allow a
71+
// generous threshold so the test stays stable against random chance
72+
// (expected collisions ≈ samples / 2^24, i.e. effectively zero).
73+
require.Less(t, collisions, samples/4,
74+
"anonymous helper must not reuse xid.New()'s machine fingerprint")
75+
}

0 commit comments

Comments
 (0)