bsv-blockchain · galt-tr · May 27, 2026 · May 3, 2026 · May 27, 2026 · May 27, 2026
@@ -32,6 +32,33 @@ const DefaultMaxBlockBytes int64 = 1 * 1024 * 1024 * 1024 // 1 GiB
 // stops a hostile server from inflating arcade's log lines.
 const maxErrorBodyBytes int64 = 512
 
+// maxSubtreeCount caps the number of subtree hashes parseBlockBinary will
+// preallocate before reading them from the response. PR#107 / F-007 caps the
+// total response body at DefaultMaxBlockBytes (1 GiB), and each subtree hash
+// is exactly 32 bytes on the wire, so the absolute upper bound implied by the
+// body cap is DefaultMaxBlockBytes/32 ≈ 33.5 million entries. We pick a
+// round, well-under-the-ceiling cap of 10 million, which is comfortably above
+// any plausible Teranode block (millions of subtrees) while preventing a
+// hostile DataHub from pushing a varint of, say, 2^60 and forcing a
+// multi-petabyte preallocation. See finding F-008.
+const maxSubtreeCount uint64 = 10_000_000
+
+// maxTxCount caps the txCount metadata varint that prefixes the binary block
+// payload. The /block endpoint records the block's total transaction count;
+// even Teranode-class blocks stay well under a billion, so 1e9 is comfortable
+// headroom and rejects obviously-bogus 2^60-style varints. The value is
+// currently only consumed to advance the reader cursor, but bounding it now
+// prevents a future refactor from introducing an allocation path on
+// untrusted input and surfaces malformed responses earlier. See F-008.
+const maxTxCount uint64 = 1_000_000_000
+
+// maxBlockSizeBytes caps the sizeBytes metadata varint. The /block endpoint
+// records the total on-chain block size including subtree files served
+// separately, so this is a logical block-size limit (1 TiB) rather than a
+// response-body limit — the response body itself is enforced by
+// DefaultMaxBlockBytes. Defense-in-depth rationale matches maxTxCount.
+const maxBlockSizeBytes uint64 = 1 << 40
+
 // BlockDataValidator is an optional response-acceptance predicate run after a
 // successful datahub fetch. Returning a non-nil error causes the fetch loop
 // to discard that peer's response (logging the reason into urlErrors) and try
@@ -271,19 +298,38 @@ func parseBlockBinary(data []byte) ([]chainhash.Hash, []byte, *chainhash.Hash, e
 	if _, rErr := txCount.ReadFrom(r); rErr != nil {
 		return nil, nil, nil, fmt.Errorf("failed to read transaction count: %w", rErr)
 	}
+	if uint64(txCount) > maxTxCount {
+		return nil, nil, nil, fmt.Errorf("tx count %d exceeds maximum of %d", uint64(txCount), maxTxCount)
+	}
 
 	// Read size in bytes (varint)
 	var sizeBytes util.VarInt
 	if _, rErr := sizeBytes.ReadFrom(r); rErr != nil {
 		return nil, nil, nil, fmt.Errorf("failed to read size in bytes: %w", rErr)
 	}
+	if uint64(sizeBytes) > maxBlockSizeBytes {
+		return nil, nil, nil, fmt.Errorf("block size %d exceeds maximum of %d", uint64(sizeBytes), maxBlockSizeBytes)
+	}
 
 	// Read subtree count (varint)
 	var subtreeCount util.VarInt
 	if _, rErr := subtreeCount.ReadFrom(r); rErr != nil {
 		return nil, nil, nil, fmt.Errorf("failed to read subtree count: %w", rErr)
 	}
 
+	// Reject implausible subtree counts before allocating. A hostile or
+	// buggy DataHub could otherwise send a 9-byte varint encoding ~2^64-1
+	// and force a multi-petabyte preallocation. A count that cannot
+	// physically fit in the remaining body (32 bytes per hash) is also
+	// rejected so we never allocate space we are guaranteed never to fill.
+	// See finding F-008.
+	if uint64(subtreeCount) > maxSubtreeCount {
+		return nil, nil, nil, fmt.Errorf("subtree count %d exceeds maximum of %d", uint64(subtreeCount), maxSubtreeCount)
+	}
+	if uint64(subtreeCount) > uint64(r.Len()/32) { //nolint:gosec // bytes.Reader.Len() is always non-negative (remaining unread bytes)
+		return nil, nil, nil, fmt.Errorf("subtree count %d exceeds remaining body capacity (%d bytes for %d-byte hashes)", uint64(subtreeCount), r.Len(), 32)
+	}
+
 	// Read subtree hashes (32 bytes each)
 	hashes := make([]chainhash.Hash, 0, uint64(subtreeCount))
 	hashBuf := make([]byte, 32)
@@ -324,6 +370,18 @@ func parseBlockBinary(data []byte) ([]chainhash.Hash, []byte, *chainhash.Hash, e
 		return hashes, nil, headerMerkleRoot, nil
 	}
 
+	// Reject coinbase BUMP lengths that cannot physically fit in the
+	// remaining response body before allocating. The body itself was
+	// already capped by FetchBlockDataForBUMPWithCap, but the in-band
+	// varint is still untrusted, so a 2^64-sized cbBUMPLen would force an
+	// enormous preallocation here without this check. We treat oversize
+	// lengths as "no coinbase BUMP available" — matching the existing
+	// best-effort posture of the coinbase-tail parsing below the subtree
+	// hashes. See finding F-008.
+	if uint64(cbBUMPLen) > uint64(r.Len()) { //nolint:gosec // bytes.Reader.Len() is always non-negative (remaining unread bytes)
+		return hashes, nil, headerMerkleRoot, nil
+	}
+
 	// Read coinbase BUMP data
 	coinbaseBUMP := make([]byte, uint64(cbBUMPLen))
 	if _, err := io.ReadFull(r, coinbaseBUMP); err != nil {

@@ -1,7 +1,9 @@
 package bump
 
 import (
+	"bytes"
 	"context"
+	"encoding/binary"
 	"net/http"
 	"net/http/httptest"
 	"strconv"
@@ -346,3 +348,180 @@ func TestFetchBlockDataForBUMP_ZeroCapUsesDefault(t *testing.T) {
 		t.Fatalf("negative cap should select the default and succeed, got: %v", err)
 	}
 }
+
+// --- Untrusted-varint allocation tests (F-008) ---------------------------
+
+// blockBytesWithSubtreeCountVarint builds a binary block payload with the
+// supplied subtreeCount written verbatim as a 9-byte 0xFF varint. The body
+// itself is short, so a parser that allocates based on the varint will trip
+// long before it tries to fill the buffer.
+func blockBytesWithSubtreeCountVarint(t *testing.T, subtreeCount uint64) []byte {
+	t.Helper()
+	var buf bytes.Buffer
+	buf.Write(make([]byte, 80)) // header (zeroed; merkle-root only needs 32 bytes)
+	buf.WriteByte(0x00)         // txCount = 0
+	buf.WriteByte(0x00)         // sizeBytes = 0
+	// subtreeCount as 0xFF + uint64 LE — the largest VarInt encoding form,
+	// so we can dial in any uint64 value the test wants.
+	buf.WriteByte(0xff)
+	var le [8]byte
+	binary.LittleEndian.PutUint64(le[:], subtreeCount)
+	buf.Write(le[:])
+	return buf.Bytes()
+}
+
+// TestFetchBlockDataForBUMP_RejectsHugeSubtreeCount verifies that a varint
+// claiming far more subtrees than maxSubtreeCount is rejected without
+// attempting a giant preallocation. We run the test through the public
+// fetcher to also exercise the body-cap and HTTP plumbing.
+func TestFetchBlockDataForBUMP_RejectsHugeSubtreeCount(t *testing.T) {
+	body := blockBytesWithSubtreeCountVarint(t, 1<<60) // ~1.15 quintillion
+
+	srv := httptest.NewServer(http.HandlerFunc(func(w http.ResponseWriter, _ *http.Request) {
+		w.WriteHeader(http.StatusOK)
+		_, _ = w.Write(body)
+	}))
+	t.Cleanup(srv.Close)
+
+	_, _, _, err := FetchBlockDataForBUMP(
+		context.Background(),
+		[]string{srv.URL},
+		"deadbeef",
+		zap.NewNop(),
+	)
+	if err == nil {
+		t.Fatal("expected error for oversized subtree count varint")
+	}
+	if !strings.Contains(err.Error(), "subtree count") {
+		t.Errorf("expected error mentioning subtree count, got: %v", err)
+	}
+}
+
+// TestParseBlockBinary_RejectsSubtreeCountAboveBodyCapacity verifies that a
+// subtreeCount which is below maxSubtreeCount but still cannot fit in the
+// remaining body bytes is rejected before the make() call. This catches
+// "plausible-but-impossible" counts that would otherwise allocate hundreds
+// of MiB for a body only kilobytes long.
+func TestParseBlockBinary_RejectsSubtreeCountAboveBodyCapacity(t *testing.T) {
+	// 1,000,000 < maxSubtreeCount (10,000,000) so the absolute cap passes,
+	// but the body has zero bytes after the varint, so the body-capacity
+	// check must reject.
+	body := blockBytesWithSubtreeCountVarint(t, 1_000_000)
+	_, _, _, err := parseBlockBinary(body)
+	if err == nil {
+		t.Fatal("expected error for subtree count exceeding body capacity")
+	}
+	if !strings.Contains(err.Error(), "remaining body capacity") {
+		t.Errorf("expected body-capacity error, got: %v", err)
+	}
+}
+
+// TestParseBlockBinary_AcceptsZeroSubtreeCount keeps the boundary case
+// covered: an empty subtree list must continue to parse successfully and
+// return zero hashes.
+func TestParseBlockBinary_AcceptsZeroSubtreeCount(t *testing.T) {
+	body := minimalValidBlockBytes(t)
+	hashes, _, root, err := parseBlockBinary(body)
+	if err != nil {
+		t.Fatalf("expected success for zero subtree count, got: %v", err)
+	}
+	if len(hashes) != 0 {
+		t.Errorf("expected 0 hashes, got %d", len(hashes))
+	}
+	if root == nil {
+		t.Errorf("expected non-nil header merkle root")
+	}
+}
+
+// TestParseBlockBinary_RejectsCoinbaseBUMPLengthAboveBodyCapacity covers the
+// sibling unbounded allocation: cbBUMPLen is a varint and was previously
+// fed straight into make([]byte, ...). We construct a payload with a valid
+// header + zero subtree hashes + minimal coinbase tx, then a bogus 2^60
+// cbBUMPLen, and confirm the parser short-circuits to "no coinbase BUMP"
+// instead of allocating an exabyte of memory.
+func TestParseBlockBinary_RejectsCoinbaseBUMPLengthAboveBodyCapacity(t *testing.T) {
+	var buf bytes.Buffer
+	buf.Write(make([]byte, 80)) // header
+	buf.WriteByte(0x00)         // txCount = 0
+	buf.WriteByte(0x00)         // sizeBytes = 0
+	buf.WriteByte(0x00)         // subtreeCount = 0
+	// Minimal-ish coinbase tx: version (4) | inCount=1 | prev hash (32) |
+	// prev index (4) | scriptLen=0 | sequence (4) | outCount=0 | locktime (4).
+	// The bsv-sdk tx parser is happy with this skeleton even though it would
+	// be rejected by consensus — we only need txBytesUsed to advance.
+	tx := make([]byte, 0, 64)
+	tx = append(tx, 0x01, 0x00, 0x00, 0x00) // version
+	tx = append(tx, 0x01)                   // inCount = 1
+	tx = append(tx, make([]byte, 32)...)    // prev hash
+	tx = append(tx, 0xff, 0xff, 0xff, 0xff) // prev index
+	tx = append(tx, 0x00)                   // scriptLen = 0
+	tx = append(tx, 0xff, 0xff, 0xff, 0xff) // sequence
+	tx = append(tx, 0x00)                   // outCount = 0
+	tx = append(tx, 0x00, 0x00, 0x00, 0x00) // locktime
+	buf.Write(tx)
+	buf.WriteByte(0x00) // blockHeight varint = 0
+	// cbBUMPLen as a 0xFF varint with a wildly oversized value.
+	buf.WriteByte(0xff)
+	var le [8]byte
+	binary.LittleEndian.PutUint64(le[:], 1<<60)
+	buf.Write(le[:])
+
+	hashes, cb, root, err := parseBlockBinary(buf.Bytes())
+	if err != nil {
+		t.Fatalf("parser should not error on bogus cbBUMPLen, got: %v", err)
+	}
+	if cb != nil {
+		t.Errorf("expected nil coinbase BUMP for oversize cbBUMPLen, got %d bytes", len(cb))
+	}
+	if len(hashes) != 0 {
+		t.Errorf("expected 0 hashes, got %d", len(hashes))
+	}
+	if root == nil {
+		t.Errorf("expected non-nil header merkle root")
+	}
+}
+
+// TestParseBlockBinary_RejectsHugeTxCount verifies that a txCount varint
+// claiming far more transactions than maxTxCount is rejected before the
+// parser advances. txCount is not currently used for allocation, but bounding
+// it is defense-in-depth against future refactors that begin to use the
+// value and surfaces bogus payloads earlier.
+func TestParseBlockBinary_RejectsHugeTxCount(t *testing.T) {
+	var buf bytes.Buffer
+	buf.Write(make([]byte, 80)) // header (zeroed)
+	// txCount as 0xFF + uint64 LE — the largest VarInt encoding form.
+	buf.WriteByte(0xff)
+	var le [8]byte
+	binary.LittleEndian.PutUint64(le[:], 1<<60)
+	buf.Write(le[:])
+
+	_, _, _, err := parseBlockBinary(buf.Bytes())
+	if err == nil {
+		t.Fatal("expected error for oversized tx count varint")
+	}
+	if !strings.Contains(err.Error(), "tx count") {
+		t.Errorf("expected error mentioning tx count, got: %v", err)
+	}
+}
+
+// TestParseBlockBinary_RejectsHugeSizeBytes verifies that a sizeBytes varint
+// claiming a block size beyond maxBlockSizeBytes is rejected. Same rationale
+// as TestParseBlockBinary_RejectsHugeTxCount — defense-in-depth.
+func TestParseBlockBinary_RejectsHugeSizeBytes(t *testing.T) {
+	var buf bytes.Buffer
+	buf.Write(make([]byte, 80)) // header (zeroed)
+	buf.WriteByte(0x00)         // txCount = 0
+	// sizeBytes as 0xFF + uint64 LE — the largest VarInt encoding form.
+	buf.WriteByte(0xff)
+	var le [8]byte
+	binary.LittleEndian.PutUint64(le[:], 1<<60)
+	buf.Write(le[:])
+
+	_, _, _, err := parseBlockBinary(buf.Bytes())
+	if err == nil {
+		t.Fatal("expected error for oversized size bytes varint")
+	}
+	if !strings.Contains(err.Error(), "block size") {
+		t.Errorf("expected error mentioning block size, got: %v", err)
+	}
+}