diff --git a/bump/datahub.go b/bump/datahub.go index c440eb0..ff2fcb5 100644 --- a/bump/datahub.go +++ b/bump/datahub.go @@ -32,6 +32,33 @@ const DefaultMaxBlockBytes int64 = 1 * 1024 * 1024 * 1024 // 1 GiB // stops a hostile server from inflating arcade's log lines. const maxErrorBodyBytes int64 = 512 +// maxSubtreeCount caps the number of subtree hashes parseBlockBinary will +// preallocate before reading them from the response. PR#107 / F-007 caps the +// total response body at DefaultMaxBlockBytes (1 GiB), and each subtree hash +// is exactly 32 bytes on the wire, so the absolute upper bound implied by the +// body cap is DefaultMaxBlockBytes/32 ≈ 33.5 million entries. We pick a +// round, well-under-the-ceiling cap of 10 million, which is comfortably above +// any plausible Teranode block (millions of subtrees) while preventing a +// hostile DataHub from pushing a varint of, say, 2^60 and forcing a +// multi-petabyte preallocation. See finding F-008. +const maxSubtreeCount uint64 = 10_000_000 + +// maxTxCount caps the txCount metadata varint that prefixes the binary block +// payload. The /block endpoint records the block's total transaction count; +// even Teranode-class blocks stay well under a billion, so 1e9 is comfortable +// headroom and rejects obviously-bogus 2^60-style varints. The value is +// currently only consumed to advance the reader cursor, but bounding it now +// prevents a future refactor from introducing an allocation path on +// untrusted input and surfaces malformed responses earlier. See F-008. +const maxTxCount uint64 = 1_000_000_000 + +// maxBlockSizeBytes caps the sizeBytes metadata varint. The /block endpoint +// records the total on-chain block size including subtree files served +// separately, so this is a logical block-size limit (1 TiB) rather than a +// response-body limit — the response body itself is enforced by +// DefaultMaxBlockBytes. Defense-in-depth rationale matches maxTxCount. +const maxBlockSizeBytes uint64 = 1 << 40 + // BlockDataValidator is an optional response-acceptance predicate run after a // successful datahub fetch. Returning a non-nil error causes the fetch loop // to discard that peer's response (logging the reason into urlErrors) and try @@ -271,12 +298,18 @@ func parseBlockBinary(data []byte) ([]chainhash.Hash, []byte, *chainhash.Hash, e if _, rErr := txCount.ReadFrom(r); rErr != nil { return nil, nil, nil, fmt.Errorf("failed to read transaction count: %w", rErr) } + if uint64(txCount) > maxTxCount { + return nil, nil, nil, fmt.Errorf("tx count %d exceeds maximum of %d", uint64(txCount), maxTxCount) + } // Read size in bytes (varint) var sizeBytes util.VarInt if _, rErr := sizeBytes.ReadFrom(r); rErr != nil { return nil, nil, nil, fmt.Errorf("failed to read size in bytes: %w", rErr) } + if uint64(sizeBytes) > maxBlockSizeBytes { + return nil, nil, nil, fmt.Errorf("block size %d exceeds maximum of %d", uint64(sizeBytes), maxBlockSizeBytes) + } // Read subtree count (varint) var subtreeCount util.VarInt @@ -284,6 +317,19 @@ func parseBlockBinary(data []byte) ([]chainhash.Hash, []byte, *chainhash.Hash, e return nil, nil, nil, fmt.Errorf("failed to read subtree count: %w", rErr) } + // Reject implausible subtree counts before allocating. A hostile or + // buggy DataHub could otherwise send a 9-byte varint encoding ~2^64-1 + // and force a multi-petabyte preallocation. A count that cannot + // physically fit in the remaining body (32 bytes per hash) is also + // rejected so we never allocate space we are guaranteed never to fill. + // See finding F-008. + if uint64(subtreeCount) > maxSubtreeCount { + return nil, nil, nil, fmt.Errorf("subtree count %d exceeds maximum of %d", uint64(subtreeCount), maxSubtreeCount) + } + if uint64(subtreeCount) > uint64(r.Len()/32) { //nolint:gosec // bytes.Reader.Len() is always non-negative (remaining unread bytes) + return nil, nil, nil, fmt.Errorf("subtree count %d exceeds remaining body capacity (%d bytes for %d-byte hashes)", uint64(subtreeCount), r.Len(), 32) + } + // Read subtree hashes (32 bytes each) hashes := make([]chainhash.Hash, 0, uint64(subtreeCount)) hashBuf := make([]byte, 32) @@ -324,6 +370,18 @@ func parseBlockBinary(data []byte) ([]chainhash.Hash, []byte, *chainhash.Hash, e return hashes, nil, headerMerkleRoot, nil } + // Reject coinbase BUMP lengths that cannot physically fit in the + // remaining response body before allocating. The body itself was + // already capped by FetchBlockDataForBUMPWithCap, but the in-band + // varint is still untrusted, so a 2^64-sized cbBUMPLen would force an + // enormous preallocation here without this check. We treat oversize + // lengths as "no coinbase BUMP available" — matching the existing + // best-effort posture of the coinbase-tail parsing below the subtree + // hashes. See finding F-008. + if uint64(cbBUMPLen) > uint64(r.Len()) { //nolint:gosec // bytes.Reader.Len() is always non-negative (remaining unread bytes) + return hashes, nil, headerMerkleRoot, nil + } + // Read coinbase BUMP data coinbaseBUMP := make([]byte, uint64(cbBUMPLen)) if _, err := io.ReadFull(r, coinbaseBUMP); err != nil { diff --git a/bump/datahub_test.go b/bump/datahub_test.go index c0aa688..92d100d 100644 --- a/bump/datahub_test.go +++ b/bump/datahub_test.go @@ -1,7 +1,9 @@ package bump import ( + "bytes" "context" + "encoding/binary" "net/http" "net/http/httptest" "strconv" @@ -346,3 +348,180 @@ func TestFetchBlockDataForBUMP_ZeroCapUsesDefault(t *testing.T) { t.Fatalf("negative cap should select the default and succeed, got: %v", err) } } + +// --- Untrusted-varint allocation tests (F-008) --------------------------- + +// blockBytesWithSubtreeCountVarint builds a binary block payload with the +// supplied subtreeCount written verbatim as a 9-byte 0xFF varint. The body +// itself is short, so a parser that allocates based on the varint will trip +// long before it tries to fill the buffer. +func blockBytesWithSubtreeCountVarint(t *testing.T, subtreeCount uint64) []byte { + t.Helper() + var buf bytes.Buffer + buf.Write(make([]byte, 80)) // header (zeroed; merkle-root only needs 32 bytes) + buf.WriteByte(0x00) // txCount = 0 + buf.WriteByte(0x00) // sizeBytes = 0 + // subtreeCount as 0xFF + uint64 LE — the largest VarInt encoding form, + // so we can dial in any uint64 value the test wants. + buf.WriteByte(0xff) + var le [8]byte + binary.LittleEndian.PutUint64(le[:], subtreeCount) + buf.Write(le[:]) + return buf.Bytes() +} + +// TestFetchBlockDataForBUMP_RejectsHugeSubtreeCount verifies that a varint +// claiming far more subtrees than maxSubtreeCount is rejected without +// attempting a giant preallocation. We run the test through the public +// fetcher to also exercise the body-cap and HTTP plumbing. +func TestFetchBlockDataForBUMP_RejectsHugeSubtreeCount(t *testing.T) { + body := blockBytesWithSubtreeCountVarint(t, 1<<60) // ~1.15 quintillion + + srv := httptest.NewServer(http.HandlerFunc(func(w http.ResponseWriter, _ *http.Request) { + w.WriteHeader(http.StatusOK) + _, _ = w.Write(body) + })) + t.Cleanup(srv.Close) + + _, _, _, err := FetchBlockDataForBUMP( + context.Background(), + []string{srv.URL}, + "deadbeef", + zap.NewNop(), + ) + if err == nil { + t.Fatal("expected error for oversized subtree count varint") + } + if !strings.Contains(err.Error(), "subtree count") { + t.Errorf("expected error mentioning subtree count, got: %v", err) + } +} + +// TestParseBlockBinary_RejectsSubtreeCountAboveBodyCapacity verifies that a +// subtreeCount which is below maxSubtreeCount but still cannot fit in the +// remaining body bytes is rejected before the make() call. This catches +// "plausible-but-impossible" counts that would otherwise allocate hundreds +// of MiB for a body only kilobytes long. +func TestParseBlockBinary_RejectsSubtreeCountAboveBodyCapacity(t *testing.T) { + // 1,000,000 < maxSubtreeCount (10,000,000) so the absolute cap passes, + // but the body has zero bytes after the varint, so the body-capacity + // check must reject. + body := blockBytesWithSubtreeCountVarint(t, 1_000_000) + _, _, _, err := parseBlockBinary(body) + if err == nil { + t.Fatal("expected error for subtree count exceeding body capacity") + } + if !strings.Contains(err.Error(), "remaining body capacity") { + t.Errorf("expected body-capacity error, got: %v", err) + } +} + +// TestParseBlockBinary_AcceptsZeroSubtreeCount keeps the boundary case +// covered: an empty subtree list must continue to parse successfully and +// return zero hashes. +func TestParseBlockBinary_AcceptsZeroSubtreeCount(t *testing.T) { + body := minimalValidBlockBytes(t) + hashes, _, root, err := parseBlockBinary(body) + if err != nil { + t.Fatalf("expected success for zero subtree count, got: %v", err) + } + if len(hashes) != 0 { + t.Errorf("expected 0 hashes, got %d", len(hashes)) + } + if root == nil { + t.Errorf("expected non-nil header merkle root") + } +} + +// TestParseBlockBinary_RejectsCoinbaseBUMPLengthAboveBodyCapacity covers the +// sibling unbounded allocation: cbBUMPLen is a varint and was previously +// fed straight into make([]byte, ...). We construct a payload with a valid +// header + zero subtree hashes + minimal coinbase tx, then a bogus 2^60 +// cbBUMPLen, and confirm the parser short-circuits to "no coinbase BUMP" +// instead of allocating an exabyte of memory. +func TestParseBlockBinary_RejectsCoinbaseBUMPLengthAboveBodyCapacity(t *testing.T) { + var buf bytes.Buffer + buf.Write(make([]byte, 80)) // header + buf.WriteByte(0x00) // txCount = 0 + buf.WriteByte(0x00) // sizeBytes = 0 + buf.WriteByte(0x00) // subtreeCount = 0 + // Minimal-ish coinbase tx: version (4) | inCount=1 | prev hash (32) | + // prev index (4) | scriptLen=0 | sequence (4) | outCount=0 | locktime (4). + // The bsv-sdk tx parser is happy with this skeleton even though it would + // be rejected by consensus — we only need txBytesUsed to advance. + tx := make([]byte, 0, 64) + tx = append(tx, 0x01, 0x00, 0x00, 0x00) // version + tx = append(tx, 0x01) // inCount = 1 + tx = append(tx, make([]byte, 32)...) // prev hash + tx = append(tx, 0xff, 0xff, 0xff, 0xff) // prev index + tx = append(tx, 0x00) // scriptLen = 0 + tx = append(tx, 0xff, 0xff, 0xff, 0xff) // sequence + tx = append(tx, 0x00) // outCount = 0 + tx = append(tx, 0x00, 0x00, 0x00, 0x00) // locktime + buf.Write(tx) + buf.WriteByte(0x00) // blockHeight varint = 0 + // cbBUMPLen as a 0xFF varint with a wildly oversized value. + buf.WriteByte(0xff) + var le [8]byte + binary.LittleEndian.PutUint64(le[:], 1<<60) + buf.Write(le[:]) + + hashes, cb, root, err := parseBlockBinary(buf.Bytes()) + if err != nil { + t.Fatalf("parser should not error on bogus cbBUMPLen, got: %v", err) + } + if cb != nil { + t.Errorf("expected nil coinbase BUMP for oversize cbBUMPLen, got %d bytes", len(cb)) + } + if len(hashes) != 0 { + t.Errorf("expected 0 hashes, got %d", len(hashes)) + } + if root == nil { + t.Errorf("expected non-nil header merkle root") + } +} + +// TestParseBlockBinary_RejectsHugeTxCount verifies that a txCount varint +// claiming far more transactions than maxTxCount is rejected before the +// parser advances. txCount is not currently used for allocation, but bounding +// it is defense-in-depth against future refactors that begin to use the +// value and surfaces bogus payloads earlier. +func TestParseBlockBinary_RejectsHugeTxCount(t *testing.T) { + var buf bytes.Buffer + buf.Write(make([]byte, 80)) // header (zeroed) + // txCount as 0xFF + uint64 LE — the largest VarInt encoding form. + buf.WriteByte(0xff) + var le [8]byte + binary.LittleEndian.PutUint64(le[:], 1<<60) + buf.Write(le[:]) + + _, _, _, err := parseBlockBinary(buf.Bytes()) + if err == nil { + t.Fatal("expected error for oversized tx count varint") + } + if !strings.Contains(err.Error(), "tx count") { + t.Errorf("expected error mentioning tx count, got: %v", err) + } +} + +// TestParseBlockBinary_RejectsHugeSizeBytes verifies that a sizeBytes varint +// claiming a block size beyond maxBlockSizeBytes is rejected. Same rationale +// as TestParseBlockBinary_RejectsHugeTxCount — defense-in-depth. +func TestParseBlockBinary_RejectsHugeSizeBytes(t *testing.T) { + var buf bytes.Buffer + buf.Write(make([]byte, 80)) // header (zeroed) + buf.WriteByte(0x00) // txCount = 0 + // sizeBytes as 0xFF + uint64 LE — the largest VarInt encoding form. + buf.WriteByte(0xff) + var le [8]byte + binary.LittleEndian.PutUint64(le[:], 1<<60) + buf.Write(le[:]) + + _, _, _, err := parseBlockBinary(buf.Bytes()) + if err == nil { + t.Fatal("expected error for oversized size bytes varint") + } + if !strings.Contains(err.Error(), "block size") { + t.Errorf("expected error mentioning block size, got: %v", err) + } +}