Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
58 changes: 58 additions & 0 deletions bump/datahub.go
Original file line number Diff line number Diff line change
Expand Up @@ -32,6 +32,33 @@ const DefaultMaxBlockBytes int64 = 1 * 1024 * 1024 * 1024 // 1 GiB
// stops a hostile server from inflating arcade's log lines.
const maxErrorBodyBytes int64 = 512

// maxSubtreeCount caps the number of subtree hashes parseBlockBinary will
// preallocate before reading them from the response. PR#107 / F-007 caps the
// total response body at DefaultMaxBlockBytes (1 GiB), and each subtree hash
// is exactly 32 bytes on the wire, so the absolute upper bound implied by the
// body cap is DefaultMaxBlockBytes/32 ≈ 33.5 million entries. We pick a
// round, well-under-the-ceiling cap of 10 million, which is comfortably above
// any plausible Teranode block (millions of subtrees) while preventing a
// hostile DataHub from pushing a varint of, say, 2^60 and forcing a
// multi-petabyte preallocation. See finding F-008.
const maxSubtreeCount uint64 = 10_000_000

// maxTxCount caps the txCount metadata varint that prefixes the binary block
// payload. The /block endpoint records the block's total transaction count;
// even Teranode-class blocks stay well under a billion, so 1e9 is comfortable
// headroom and rejects obviously-bogus 2^60-style varints. The value is
// currently only consumed to advance the reader cursor, but bounding it now
// prevents a future refactor from introducing an allocation path on
// untrusted input and surfaces malformed responses earlier. See F-008.
const maxTxCount uint64 = 1_000_000_000

// maxBlockSizeBytes caps the sizeBytes metadata varint. The /block endpoint
// records the total on-chain block size including subtree files served
// separately, so this is a logical block-size limit (1 TiB) rather than a
// response-body limit — the response body itself is enforced by
// DefaultMaxBlockBytes. Defense-in-depth rationale matches maxTxCount.
const maxBlockSizeBytes uint64 = 1 << 40

// BlockDataValidator is an optional response-acceptance predicate run after a
// successful datahub fetch. Returning a non-nil error causes the fetch loop
// to discard that peer's response (logging the reason into urlErrors) and try
Expand Down Expand Up @@ -271,19 +298,38 @@ func parseBlockBinary(data []byte) ([]chainhash.Hash, []byte, *chainhash.Hash, e
if _, rErr := txCount.ReadFrom(r); rErr != nil {
return nil, nil, nil, fmt.Errorf("failed to read transaction count: %w", rErr)
}
if uint64(txCount) > maxTxCount {
return nil, nil, nil, fmt.Errorf("tx count %d exceeds maximum of %d", uint64(txCount), maxTxCount)
}

// Read size in bytes (varint)
var sizeBytes util.VarInt
if _, rErr := sizeBytes.ReadFrom(r); rErr != nil {
return nil, nil, nil, fmt.Errorf("failed to read size in bytes: %w", rErr)
}
if uint64(sizeBytes) > maxBlockSizeBytes {
return nil, nil, nil, fmt.Errorf("block size %d exceeds maximum of %d", uint64(sizeBytes), maxBlockSizeBytes)
}

// Read subtree count (varint)
var subtreeCount util.VarInt
if _, rErr := subtreeCount.ReadFrom(r); rErr != nil {
return nil, nil, nil, fmt.Errorf("failed to read subtree count: %w", rErr)
}

// Reject implausible subtree counts before allocating. A hostile or
// buggy DataHub could otherwise send a 9-byte varint encoding ~2^64-1
// and force a multi-petabyte preallocation. A count that cannot
// physically fit in the remaining body (32 bytes per hash) is also
// rejected so we never allocate space we are guaranteed never to fill.
// See finding F-008.
if uint64(subtreeCount) > maxSubtreeCount {
return nil, nil, nil, fmt.Errorf("subtree count %d exceeds maximum of %d", uint64(subtreeCount), maxSubtreeCount)
}
if uint64(subtreeCount) > uint64(r.Len()/32) { //nolint:gosec // bytes.Reader.Len() is always non-negative (remaining unread bytes)
return nil, nil, nil, fmt.Errorf("subtree count %d exceeds remaining body capacity (%d bytes for %d-byte hashes)", uint64(subtreeCount), r.Len(), 32)
}

// Read subtree hashes (32 bytes each)
hashes := make([]chainhash.Hash, 0, uint64(subtreeCount))
hashBuf := make([]byte, 32)
Expand Down Expand Up @@ -324,6 +370,18 @@ func parseBlockBinary(data []byte) ([]chainhash.Hash, []byte, *chainhash.Hash, e
return hashes, nil, headerMerkleRoot, nil
}

// Reject coinbase BUMP lengths that cannot physically fit in the
// remaining response body before allocating. The body itself was
// already capped by FetchBlockDataForBUMPWithCap, but the in-band
// varint is still untrusted, so a 2^64-sized cbBUMPLen would force an
// enormous preallocation here without this check. We treat oversize
// lengths as "no coinbase BUMP available" — matching the existing
// best-effort posture of the coinbase-tail parsing below the subtree
// hashes. See finding F-008.
if uint64(cbBUMPLen) > uint64(r.Len()) { //nolint:gosec // bytes.Reader.Len() is always non-negative (remaining unread bytes)
return hashes, nil, headerMerkleRoot, nil
}

// Read coinbase BUMP data
coinbaseBUMP := make([]byte, uint64(cbBUMPLen))
if _, err := io.ReadFull(r, coinbaseBUMP); err != nil {
Expand Down
179 changes: 179 additions & 0 deletions bump/datahub_test.go
Original file line number Diff line number Diff line change
@@ -1,7 +1,9 @@
package bump

import (
"bytes"
"context"
"encoding/binary"
"net/http"
"net/http/httptest"
"strconv"
Expand Down Expand Up @@ -346,3 +348,180 @@ func TestFetchBlockDataForBUMP_ZeroCapUsesDefault(t *testing.T) {
t.Fatalf("negative cap should select the default and succeed, got: %v", err)
}
}

// --- Untrusted-varint allocation tests (F-008) ---------------------------

// blockBytesWithSubtreeCountVarint builds a binary block payload with the
// supplied subtreeCount written verbatim as a 9-byte 0xFF varint. The body
// itself is short, so a parser that allocates based on the varint will trip
// long before it tries to fill the buffer.
func blockBytesWithSubtreeCountVarint(t *testing.T, subtreeCount uint64) []byte {
t.Helper()
var buf bytes.Buffer
buf.Write(make([]byte, 80)) // header (zeroed; merkle-root only needs 32 bytes)
buf.WriteByte(0x00) // txCount = 0
buf.WriteByte(0x00) // sizeBytes = 0
// subtreeCount as 0xFF + uint64 LE — the largest VarInt encoding form,
// so we can dial in any uint64 value the test wants.
buf.WriteByte(0xff)
var le [8]byte
binary.LittleEndian.PutUint64(le[:], subtreeCount)
buf.Write(le[:])
return buf.Bytes()
}

// TestFetchBlockDataForBUMP_RejectsHugeSubtreeCount verifies that a varint
// claiming far more subtrees than maxSubtreeCount is rejected without
// attempting a giant preallocation. We run the test through the public
// fetcher to also exercise the body-cap and HTTP plumbing.
func TestFetchBlockDataForBUMP_RejectsHugeSubtreeCount(t *testing.T) {
body := blockBytesWithSubtreeCountVarint(t, 1<<60) // ~1.15 quintillion

srv := httptest.NewServer(http.HandlerFunc(func(w http.ResponseWriter, _ *http.Request) {
w.WriteHeader(http.StatusOK)
_, _ = w.Write(body)
}))
t.Cleanup(srv.Close)

_, _, _, err := FetchBlockDataForBUMP(
context.Background(),
[]string{srv.URL},
"deadbeef",
zap.NewNop(),
)
if err == nil {
t.Fatal("expected error for oversized subtree count varint")
}
if !strings.Contains(err.Error(), "subtree count") {
t.Errorf("expected error mentioning subtree count, got: %v", err)
}
}

// TestParseBlockBinary_RejectsSubtreeCountAboveBodyCapacity verifies that a
// subtreeCount which is below maxSubtreeCount but still cannot fit in the
// remaining body bytes is rejected before the make() call. This catches
// "plausible-but-impossible" counts that would otherwise allocate hundreds
// of MiB for a body only kilobytes long.
func TestParseBlockBinary_RejectsSubtreeCountAboveBodyCapacity(t *testing.T) {
// 1,000,000 < maxSubtreeCount (10,000,000) so the absolute cap passes,
// but the body has zero bytes after the varint, so the body-capacity
// check must reject.
body := blockBytesWithSubtreeCountVarint(t, 1_000_000)
_, _, _, err := parseBlockBinary(body)
if err == nil {
t.Fatal("expected error for subtree count exceeding body capacity")
}
if !strings.Contains(err.Error(), "remaining body capacity") {
t.Errorf("expected body-capacity error, got: %v", err)
}
}

// TestParseBlockBinary_AcceptsZeroSubtreeCount keeps the boundary case
// covered: an empty subtree list must continue to parse successfully and
// return zero hashes.
func TestParseBlockBinary_AcceptsZeroSubtreeCount(t *testing.T) {
body := minimalValidBlockBytes(t)
hashes, _, root, err := parseBlockBinary(body)
if err != nil {
t.Fatalf("expected success for zero subtree count, got: %v", err)
}
if len(hashes) != 0 {
t.Errorf("expected 0 hashes, got %d", len(hashes))
}
if root == nil {
t.Errorf("expected non-nil header merkle root")
}
}

// TestParseBlockBinary_RejectsCoinbaseBUMPLengthAboveBodyCapacity covers the
// sibling unbounded allocation: cbBUMPLen is a varint and was previously
// fed straight into make([]byte, ...). We construct a payload with a valid
// header + zero subtree hashes + minimal coinbase tx, then a bogus 2^60
// cbBUMPLen, and confirm the parser short-circuits to "no coinbase BUMP"
// instead of allocating an exabyte of memory.
func TestParseBlockBinary_RejectsCoinbaseBUMPLengthAboveBodyCapacity(t *testing.T) {
var buf bytes.Buffer
buf.Write(make([]byte, 80)) // header
buf.WriteByte(0x00) // txCount = 0
buf.WriteByte(0x00) // sizeBytes = 0
buf.WriteByte(0x00) // subtreeCount = 0
// Minimal-ish coinbase tx: version (4) | inCount=1 | prev hash (32) |
// prev index (4) | scriptLen=0 | sequence (4) | outCount=0 | locktime (4).
// The bsv-sdk tx parser is happy with this skeleton even though it would
// be rejected by consensus — we only need txBytesUsed to advance.
tx := make([]byte, 0, 64)
tx = append(tx, 0x01, 0x00, 0x00, 0x00) // version
tx = append(tx, 0x01) // inCount = 1
tx = append(tx, make([]byte, 32)...) // prev hash
tx = append(tx, 0xff, 0xff, 0xff, 0xff) // prev index
tx = append(tx, 0x00) // scriptLen = 0
tx = append(tx, 0xff, 0xff, 0xff, 0xff) // sequence
tx = append(tx, 0x00) // outCount = 0
tx = append(tx, 0x00, 0x00, 0x00, 0x00) // locktime
buf.Write(tx)
buf.WriteByte(0x00) // blockHeight varint = 0
// cbBUMPLen as a 0xFF varint with a wildly oversized value.
buf.WriteByte(0xff)
var le [8]byte
binary.LittleEndian.PutUint64(le[:], 1<<60)
buf.Write(le[:])

hashes, cb, root, err := parseBlockBinary(buf.Bytes())
if err != nil {
t.Fatalf("parser should not error on bogus cbBUMPLen, got: %v", err)
}
if cb != nil {
t.Errorf("expected nil coinbase BUMP for oversize cbBUMPLen, got %d bytes", len(cb))
}
if len(hashes) != 0 {
t.Errorf("expected 0 hashes, got %d", len(hashes))
}
if root == nil {
t.Errorf("expected non-nil header merkle root")
}
}

// TestParseBlockBinary_RejectsHugeTxCount verifies that a txCount varint
// claiming far more transactions than maxTxCount is rejected before the
// parser advances. txCount is not currently used for allocation, but bounding
// it is defense-in-depth against future refactors that begin to use the
// value and surfaces bogus payloads earlier.
func TestParseBlockBinary_RejectsHugeTxCount(t *testing.T) {
var buf bytes.Buffer
buf.Write(make([]byte, 80)) // header (zeroed)
// txCount as 0xFF + uint64 LE — the largest VarInt encoding form.
buf.WriteByte(0xff)
var le [8]byte
binary.LittleEndian.PutUint64(le[:], 1<<60)
buf.Write(le[:])

_, _, _, err := parseBlockBinary(buf.Bytes())
if err == nil {
t.Fatal("expected error for oversized tx count varint")
}
if !strings.Contains(err.Error(), "tx count") {
t.Errorf("expected error mentioning tx count, got: %v", err)
}
}

// TestParseBlockBinary_RejectsHugeSizeBytes verifies that a sizeBytes varint
// claiming a block size beyond maxBlockSizeBytes is rejected. Same rationale
// as TestParseBlockBinary_RejectsHugeTxCount — defense-in-depth.
func TestParseBlockBinary_RejectsHugeSizeBytes(t *testing.T) {
var buf bytes.Buffer
buf.Write(make([]byte, 80)) // header (zeroed)
buf.WriteByte(0x00) // txCount = 0
// sizeBytes as 0xFF + uint64 LE — the largest VarInt encoding form.
buf.WriteByte(0xff)
var le [8]byte
binary.LittleEndian.PutUint64(le[:], 1<<60)
buf.Write(le[:])

_, _, _, err := parseBlockBinary(buf.Bytes())
if err == nil {
t.Fatal("expected error for oversized size bytes varint")
}
if !strings.Contains(err.Error(), "block size") {
t.Errorf("expected error mentioning block size, got: %v", err)
}
}
Loading