Skip to content

Commit 1826114

Browse files
authored
fix(bump): validate subtree count varint to bound parser allocations (#66) (#129)
* fix(bump): validate subtree count varint to bound parser allocations (#66) The block-binary parser fed two untrusted varints — subtreeCount and cbBUMPLen — directly into make() without validating them. PR#107 capped the body at 1 GiB but the in-band counts could still claim ~2^64 elements, forcing a multi-petabyte preallocation before the LimitReader ever ran out of bytes. Reject implausible subtree counts (>10M, the documented Teranode-class headroom) and counts that cannot physically fit in the remaining body. Apply the same body-capacity sanity check to cbBUMPLen, falling back to the existing "no coinbase BUMP" path so the subtree hashes are still returned. Adds tests covering huge varints, the body-capacity boundary, and the cbBUMPLen sibling path. Closes #66 * fix(bump): also bound txCount and sizeBytes varints The /block binary response prefixes the payload with two metadata varints (txCount, sizeBytes) that parseBlockBinary currently reads only to advance the cursor. They are not used for allocation today, so they are not part of the F-008 exploit surface — but a future refactor could begin to consume them, and the cost of bounding them now is one branch per parse. Cap txCount at 1e9 (well above any plausible Teranode block) and sizeBytes at 1 TiB (a logical block-size limit, not a response-body limit — the body itself is enforced separately by DefaultMaxBlockBytes). Adds two tests asserting that oversized varints are rejected. Defense-in-depth follow-up to #66 / F-008. * fix(bump): silence gosec G115 on bytes.Reader.Len() conversions bytes.Reader.Len() is documented to return the number of unread bytes, which is always non-negative. gosec G115 flags the int -> uint64 cast defensively; add inline //nolint annotations with rationale, matching the project's existing nolint:gosec style in datahub_test.go.
1 parent 40ac42e commit 1826114

2 files changed

Lines changed: 237 additions & 0 deletions

File tree

bump/datahub.go

Lines changed: 58 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -32,6 +32,33 @@ const DefaultMaxBlockBytes int64 = 1 * 1024 * 1024 * 1024 // 1 GiB
3232
// stops a hostile server from inflating arcade's log lines.
3333
const maxErrorBodyBytes int64 = 512
3434

35+
// maxSubtreeCount caps the number of subtree hashes parseBlockBinary will
36+
// preallocate before reading them from the response. PR#107 / F-007 caps the
37+
// total response body at DefaultMaxBlockBytes (1 GiB), and each subtree hash
38+
// is exactly 32 bytes on the wire, so the absolute upper bound implied by the
39+
// body cap is DefaultMaxBlockBytes/32 ≈ 33.5 million entries. We pick a
40+
// round, well-under-the-ceiling cap of 10 million, which is comfortably above
41+
// any plausible Teranode block (millions of subtrees) while preventing a
42+
// hostile DataHub from pushing a varint of, say, 2^60 and forcing a
43+
// multi-petabyte preallocation. See finding F-008.
44+
const maxSubtreeCount uint64 = 10_000_000
45+
46+
// maxTxCount caps the txCount metadata varint that prefixes the binary block
47+
// payload. The /block endpoint records the block's total transaction count;
48+
// even Teranode-class blocks stay well under a billion, so 1e9 is comfortable
49+
// headroom and rejects obviously-bogus 2^60-style varints. The value is
50+
// currently only consumed to advance the reader cursor, but bounding it now
51+
// prevents a future refactor from introducing an allocation path on
52+
// untrusted input and surfaces malformed responses earlier. See F-008.
53+
const maxTxCount uint64 = 1_000_000_000
54+
55+
// maxBlockSizeBytes caps the sizeBytes metadata varint. The /block endpoint
56+
// records the total on-chain block size including subtree files served
57+
// separately, so this is a logical block-size limit (1 TiB) rather than a
58+
// response-body limit — the response body itself is enforced by
59+
// DefaultMaxBlockBytes. Defense-in-depth rationale matches maxTxCount.
60+
const maxBlockSizeBytes uint64 = 1 << 40
61+
3562
// BlockDataValidator is an optional response-acceptance predicate run after a
3663
// successful datahub fetch. Returning a non-nil error causes the fetch loop
3764
// to discard that peer's response (logging the reason into urlErrors) and try
@@ -271,19 +298,38 @@ func parseBlockBinary(data []byte) ([]chainhash.Hash, []byte, *chainhash.Hash, e
271298
if _, rErr := txCount.ReadFrom(r); rErr != nil {
272299
return nil, nil, nil, fmt.Errorf("failed to read transaction count: %w", rErr)
273300
}
301+
if uint64(txCount) > maxTxCount {
302+
return nil, nil, nil, fmt.Errorf("tx count %d exceeds maximum of %d", uint64(txCount), maxTxCount)
303+
}
274304

275305
// Read size in bytes (varint)
276306
var sizeBytes util.VarInt
277307
if _, rErr := sizeBytes.ReadFrom(r); rErr != nil {
278308
return nil, nil, nil, fmt.Errorf("failed to read size in bytes: %w", rErr)
279309
}
310+
if uint64(sizeBytes) > maxBlockSizeBytes {
311+
return nil, nil, nil, fmt.Errorf("block size %d exceeds maximum of %d", uint64(sizeBytes), maxBlockSizeBytes)
312+
}
280313

281314
// Read subtree count (varint)
282315
var subtreeCount util.VarInt
283316
if _, rErr := subtreeCount.ReadFrom(r); rErr != nil {
284317
return nil, nil, nil, fmt.Errorf("failed to read subtree count: %w", rErr)
285318
}
286319

320+
// Reject implausible subtree counts before allocating. A hostile or
321+
// buggy DataHub could otherwise send a 9-byte varint encoding ~2^64-1
322+
// and force a multi-petabyte preallocation. A count that cannot
323+
// physically fit in the remaining body (32 bytes per hash) is also
324+
// rejected so we never allocate space we are guaranteed never to fill.
325+
// See finding F-008.
326+
if uint64(subtreeCount) > maxSubtreeCount {
327+
return nil, nil, nil, fmt.Errorf("subtree count %d exceeds maximum of %d", uint64(subtreeCount), maxSubtreeCount)
328+
}
329+
if uint64(subtreeCount) > uint64(r.Len()/32) { //nolint:gosec // bytes.Reader.Len() is always non-negative (remaining unread bytes)
330+
return nil, nil, nil, fmt.Errorf("subtree count %d exceeds remaining body capacity (%d bytes for %d-byte hashes)", uint64(subtreeCount), r.Len(), 32)
331+
}
332+
287333
// Read subtree hashes (32 bytes each)
288334
hashes := make([]chainhash.Hash, 0, uint64(subtreeCount))
289335
hashBuf := make([]byte, 32)
@@ -324,6 +370,18 @@ func parseBlockBinary(data []byte) ([]chainhash.Hash, []byte, *chainhash.Hash, e
324370
return hashes, nil, headerMerkleRoot, nil
325371
}
326372

373+
// Reject coinbase BUMP lengths that cannot physically fit in the
374+
// remaining response body before allocating. The body itself was
375+
// already capped by FetchBlockDataForBUMPWithCap, but the in-band
376+
// varint is still untrusted, so a 2^64-sized cbBUMPLen would force an
377+
// enormous preallocation here without this check. We treat oversize
378+
// lengths as "no coinbase BUMP available" — matching the existing
379+
// best-effort posture of the coinbase-tail parsing below the subtree
380+
// hashes. See finding F-008.
381+
if uint64(cbBUMPLen) > uint64(r.Len()) { //nolint:gosec // bytes.Reader.Len() is always non-negative (remaining unread bytes)
382+
return hashes, nil, headerMerkleRoot, nil
383+
}
384+
327385
// Read coinbase BUMP data
328386
coinbaseBUMP := make([]byte, uint64(cbBUMPLen))
329387
if _, err := io.ReadFull(r, coinbaseBUMP); err != nil {

bump/datahub_test.go

Lines changed: 179 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1,7 +1,9 @@
11
package bump
22

33
import (
4+
"bytes"
45
"context"
6+
"encoding/binary"
57
"net/http"
68
"net/http/httptest"
79
"strconv"
@@ -346,3 +348,180 @@ func TestFetchBlockDataForBUMP_ZeroCapUsesDefault(t *testing.T) {
346348
t.Fatalf("negative cap should select the default and succeed, got: %v", err)
347349
}
348350
}
351+
352+
// --- Untrusted-varint allocation tests (F-008) ---------------------------
353+
354+
// blockBytesWithSubtreeCountVarint builds a binary block payload with the
355+
// supplied subtreeCount written verbatim as a 9-byte 0xFF varint. The body
356+
// itself is short, so a parser that allocates based on the varint will trip
357+
// long before it tries to fill the buffer.
358+
func blockBytesWithSubtreeCountVarint(t *testing.T, subtreeCount uint64) []byte {
359+
t.Helper()
360+
var buf bytes.Buffer
361+
buf.Write(make([]byte, 80)) // header (zeroed; merkle-root only needs 32 bytes)
362+
buf.WriteByte(0x00) // txCount = 0
363+
buf.WriteByte(0x00) // sizeBytes = 0
364+
// subtreeCount as 0xFF + uint64 LE — the largest VarInt encoding form,
365+
// so we can dial in any uint64 value the test wants.
366+
buf.WriteByte(0xff)
367+
var le [8]byte
368+
binary.LittleEndian.PutUint64(le[:], subtreeCount)
369+
buf.Write(le[:])
370+
return buf.Bytes()
371+
}
372+
373+
// TestFetchBlockDataForBUMP_RejectsHugeSubtreeCount verifies that a varint
374+
// claiming far more subtrees than maxSubtreeCount is rejected without
375+
// attempting a giant preallocation. We run the test through the public
376+
// fetcher to also exercise the body-cap and HTTP plumbing.
377+
func TestFetchBlockDataForBUMP_RejectsHugeSubtreeCount(t *testing.T) {
378+
body := blockBytesWithSubtreeCountVarint(t, 1<<60) // ~1.15 quintillion
379+
380+
srv := httptest.NewServer(http.HandlerFunc(func(w http.ResponseWriter, _ *http.Request) {
381+
w.WriteHeader(http.StatusOK)
382+
_, _ = w.Write(body)
383+
}))
384+
t.Cleanup(srv.Close)
385+
386+
_, _, _, err := FetchBlockDataForBUMP(
387+
context.Background(),
388+
[]string{srv.URL},
389+
"deadbeef",
390+
zap.NewNop(),
391+
)
392+
if err == nil {
393+
t.Fatal("expected error for oversized subtree count varint")
394+
}
395+
if !strings.Contains(err.Error(), "subtree count") {
396+
t.Errorf("expected error mentioning subtree count, got: %v", err)
397+
}
398+
}
399+
400+
// TestParseBlockBinary_RejectsSubtreeCountAboveBodyCapacity verifies that a
401+
// subtreeCount which is below maxSubtreeCount but still cannot fit in the
402+
// remaining body bytes is rejected before the make() call. This catches
403+
// "plausible-but-impossible" counts that would otherwise allocate hundreds
404+
// of MiB for a body only kilobytes long.
405+
func TestParseBlockBinary_RejectsSubtreeCountAboveBodyCapacity(t *testing.T) {
406+
// 1,000,000 < maxSubtreeCount (10,000,000) so the absolute cap passes,
407+
// but the body has zero bytes after the varint, so the body-capacity
408+
// check must reject.
409+
body := blockBytesWithSubtreeCountVarint(t, 1_000_000)
410+
_, _, _, err := parseBlockBinary(body)
411+
if err == nil {
412+
t.Fatal("expected error for subtree count exceeding body capacity")
413+
}
414+
if !strings.Contains(err.Error(), "remaining body capacity") {
415+
t.Errorf("expected body-capacity error, got: %v", err)
416+
}
417+
}
418+
419+
// TestParseBlockBinary_AcceptsZeroSubtreeCount keeps the boundary case
420+
// covered: an empty subtree list must continue to parse successfully and
421+
// return zero hashes.
422+
func TestParseBlockBinary_AcceptsZeroSubtreeCount(t *testing.T) {
423+
body := minimalValidBlockBytes(t)
424+
hashes, _, root, err := parseBlockBinary(body)
425+
if err != nil {
426+
t.Fatalf("expected success for zero subtree count, got: %v", err)
427+
}
428+
if len(hashes) != 0 {
429+
t.Errorf("expected 0 hashes, got %d", len(hashes))
430+
}
431+
if root == nil {
432+
t.Errorf("expected non-nil header merkle root")
433+
}
434+
}
435+
436+
// TestParseBlockBinary_RejectsCoinbaseBUMPLengthAboveBodyCapacity covers the
437+
// sibling unbounded allocation: cbBUMPLen is a varint and was previously
438+
// fed straight into make([]byte, ...). We construct a payload with a valid
439+
// header + zero subtree hashes + minimal coinbase tx, then a bogus 2^60
440+
// cbBUMPLen, and confirm the parser short-circuits to "no coinbase BUMP"
441+
// instead of allocating an exabyte of memory.
442+
func TestParseBlockBinary_RejectsCoinbaseBUMPLengthAboveBodyCapacity(t *testing.T) {
443+
var buf bytes.Buffer
444+
buf.Write(make([]byte, 80)) // header
445+
buf.WriteByte(0x00) // txCount = 0
446+
buf.WriteByte(0x00) // sizeBytes = 0
447+
buf.WriteByte(0x00) // subtreeCount = 0
448+
// Minimal-ish coinbase tx: version (4) | inCount=1 | prev hash (32) |
449+
// prev index (4) | scriptLen=0 | sequence (4) | outCount=0 | locktime (4).
450+
// The bsv-sdk tx parser is happy with this skeleton even though it would
451+
// be rejected by consensus — we only need txBytesUsed to advance.
452+
tx := make([]byte, 0, 64)
453+
tx = append(tx, 0x01, 0x00, 0x00, 0x00) // version
454+
tx = append(tx, 0x01) // inCount = 1
455+
tx = append(tx, make([]byte, 32)...) // prev hash
456+
tx = append(tx, 0xff, 0xff, 0xff, 0xff) // prev index
457+
tx = append(tx, 0x00) // scriptLen = 0
458+
tx = append(tx, 0xff, 0xff, 0xff, 0xff) // sequence
459+
tx = append(tx, 0x00) // outCount = 0
460+
tx = append(tx, 0x00, 0x00, 0x00, 0x00) // locktime
461+
buf.Write(tx)
462+
buf.WriteByte(0x00) // blockHeight varint = 0
463+
// cbBUMPLen as a 0xFF varint with a wildly oversized value.
464+
buf.WriteByte(0xff)
465+
var le [8]byte
466+
binary.LittleEndian.PutUint64(le[:], 1<<60)
467+
buf.Write(le[:])
468+
469+
hashes, cb, root, err := parseBlockBinary(buf.Bytes())
470+
if err != nil {
471+
t.Fatalf("parser should not error on bogus cbBUMPLen, got: %v", err)
472+
}
473+
if cb != nil {
474+
t.Errorf("expected nil coinbase BUMP for oversize cbBUMPLen, got %d bytes", len(cb))
475+
}
476+
if len(hashes) != 0 {
477+
t.Errorf("expected 0 hashes, got %d", len(hashes))
478+
}
479+
if root == nil {
480+
t.Errorf("expected non-nil header merkle root")
481+
}
482+
}
483+
484+
// TestParseBlockBinary_RejectsHugeTxCount verifies that a txCount varint
485+
// claiming far more transactions than maxTxCount is rejected before the
486+
// parser advances. txCount is not currently used for allocation, but bounding
487+
// it is defense-in-depth against future refactors that begin to use the
488+
// value and surfaces bogus payloads earlier.
489+
func TestParseBlockBinary_RejectsHugeTxCount(t *testing.T) {
490+
var buf bytes.Buffer
491+
buf.Write(make([]byte, 80)) // header (zeroed)
492+
// txCount as 0xFF + uint64 LE — the largest VarInt encoding form.
493+
buf.WriteByte(0xff)
494+
var le [8]byte
495+
binary.LittleEndian.PutUint64(le[:], 1<<60)
496+
buf.Write(le[:])
497+
498+
_, _, _, err := parseBlockBinary(buf.Bytes())
499+
if err == nil {
500+
t.Fatal("expected error for oversized tx count varint")
501+
}
502+
if !strings.Contains(err.Error(), "tx count") {
503+
t.Errorf("expected error mentioning tx count, got: %v", err)
504+
}
505+
}
506+
507+
// TestParseBlockBinary_RejectsHugeSizeBytes verifies that a sizeBytes varint
508+
// claiming a block size beyond maxBlockSizeBytes is rejected. Same rationale
509+
// as TestParseBlockBinary_RejectsHugeTxCount — defense-in-depth.
510+
func TestParseBlockBinary_RejectsHugeSizeBytes(t *testing.T) {
511+
var buf bytes.Buffer
512+
buf.Write(make([]byte, 80)) // header (zeroed)
513+
buf.WriteByte(0x00) // txCount = 0
514+
// sizeBytes as 0xFF + uint64 LE — the largest VarInt encoding form.
515+
buf.WriteByte(0xff)
516+
var le [8]byte
517+
binary.LittleEndian.PutUint64(le[:], 1<<60)
518+
buf.Write(le[:])
519+
520+
_, _, _, err := parseBlockBinary(buf.Bytes())
521+
if err == nil {
522+
t.Fatal("expected error for oversized size bytes varint")
523+
}
524+
if !strings.Contains(err.Error(), "block size") {
525+
t.Errorf("expected error mentioning block size, got: %v", err)
526+
}
527+
}

0 commit comments

Comments
 (0)