diff --git a/android/guava-tests/test/com/google/common/hash/BloomFilterTest.java b/android/guava-tests/test/com/google/common/hash/BloomFilterTest.java index 80e80e7860b0..e71003b273cc 100644 --- a/android/guava-tests/test/com/google/common/hash/BloomFilterTest.java +++ b/android/guava-tests/test/com/google/common/hash/BloomFilterTest.java @@ -353,6 +353,47 @@ public void testBitSize() { } } + /** + * Tests that bitSize() can be used to predict the serialization size produced by writeTo(). + * + *

The serialization format consists of a 6-byte header (1 byte strategy, 1 byte hash + * functions, 4 bytes array length) followed by the bit array data (bitSize / 8 bytes). + */ + public void testBitSizeMatchesSerializationSize() throws Exception { + int[] expectedInsertionValues = {1, 10, 100, 1000, 10000}; + double[] fppValues = {0.01, 0.03, 0.1}; + + for (int expectedInsertions : expectedInsertionValues) { + for (double fpp : fppValues) { + BloomFilter bf = + BloomFilter.create(Funnels.unencodedCharsFunnel(), expectedInsertions, fpp); + + // Add some elements + for (int i = 0; i < expectedInsertions / 2; i++) { + bf.put("element" + i); + } + + // Calculate expected size based on bitSize() + // Header: 1 byte (strategy) + 1 byte (hash functions) + 4 bytes (array length) = 6 bytes + // Data: bitSize / 8 bytes + long predictedSize = bf.bitSize() / 8 + 6; + + // Serialize and measure actual size + ByteArrayOutputStream out = new ByteArrayOutputStream(); + bf.writeTo(out); + int actualSize = out.size(); + + assertEquals( + "Serialization size mismatch for expectedInsertions=" + + expectedInsertions + + " fpp=" + + fpp, + predictedSize, + actualSize); + } + } + } + public void testApproximateElementCount() { int numInsertions = 1000; BloomFilter bf = BloomFilter.create(Funnels.integerFunnel(), numInsertions); diff --git a/android/guava/src/com/google/common/hash/BloomFilter.java b/android/guava/src/com/google/common/hash/BloomFilter.java index 054d0f2ce5db..297502689f7b 100644 --- a/android/guava/src/com/google/common/hash/BloomFilter.java +++ b/android/guava/src/com/google/common/hash/BloomFilter.java @@ -220,9 +220,17 @@ public long approximateElementCount() { -Math.log1p(-fractionOfBitsSet) * bitSize / numHashFunctions, RoundingMode.HALF_UP); } - /** Returns the number of bits in the underlying bit array. */ - @VisibleForTesting - long bitSize() { + /** + * Returns the number of bits in the underlying bit array. + * + *

This can be useful when pre-allocating space for serialization. The number of bytes written + * by {@link #writeTo(OutputStream)} is {@code bitSize() / 8 + 6} (6 bytes for the header: 1 byte + * for the strategy, 1 byte for the number of hash functions, and 4 bytes for the array length). + * + * @return the number of bits in this Bloom filter's underlying bit array + * @since 35.0 + */ + public long bitSize() { return bits.bitSize(); } diff --git a/guava-tests/test/com/google/common/hash/BloomFilterTest.java b/guava-tests/test/com/google/common/hash/BloomFilterTest.java index 665612d6db34..f7780fe2652a 100644 --- a/guava-tests/test/com/google/common/hash/BloomFilterTest.java +++ b/guava-tests/test/com/google/common/hash/BloomFilterTest.java @@ -355,6 +355,47 @@ public void testBitSize() { } } + /** + * Tests that bitSize() can be used to predict the serialization size produced by writeTo(). + * + *

The serialization format consists of a 6-byte header (1 byte strategy, 1 byte hash + * functions, 4 bytes array length) followed by the bit array data (bitSize / 8 bytes). + */ + public void testBitSizeMatchesSerializationSize() throws Exception { + int[] expectedInsertionValues = {1, 10, 100, 1000, 10000}; + double[] fppValues = {0.01, 0.03, 0.1}; + + for (int expectedInsertions : expectedInsertionValues) { + for (double fpp : fppValues) { + BloomFilter bf = + BloomFilter.create(Funnels.unencodedCharsFunnel(), expectedInsertions, fpp); + + // Add some elements + for (int i = 0; i < expectedInsertions / 2; i++) { + bf.put("element" + i); + } + + // Calculate expected size based on bitSize() + // Header: 1 byte (strategy) + 1 byte (hash functions) + 4 bytes (array length) = 6 bytes + // Data: bitSize / 8 bytes + long predictedSize = bf.bitSize() / 8 + 6; + + // Serialize and measure actual size + ByteArrayOutputStream out = new ByteArrayOutputStream(); + bf.writeTo(out); + int actualSize = out.size(); + + assertEquals( + "Serialization size mismatch for expectedInsertions=" + + expectedInsertions + + " fpp=" + + fpp, + predictedSize, + actualSize); + } + } + } + public void testApproximateElementCount() { int numInsertions = 1000; BloomFilter bf = BloomFilter.create(Funnels.integerFunnel(), numInsertions); diff --git a/guava/src/com/google/common/hash/BloomFilter.java b/guava/src/com/google/common/hash/BloomFilter.java index 215c191eb70a..4cc2b4a40208 100644 --- a/guava/src/com/google/common/hash/BloomFilter.java +++ b/guava/src/com/google/common/hash/BloomFilter.java @@ -232,9 +232,17 @@ public long approximateElementCount() { -Math.log1p(-fractionOfBitsSet) * bitSize / numHashFunctions, RoundingMode.HALF_UP); } - /** Returns the number of bits in the underlying bit array. */ - @VisibleForTesting - long bitSize() { + /** + * Returns the number of bits in the underlying bit array. + * + *

This can be useful when pre-allocating space for serialization. The number of bytes written + * by {@link #writeTo(OutputStream)} is {@code bitSize() / 8 + 6} (6 bytes for the header: 1 byte + * for the strategy, 1 byte for the number of hash functions, and 4 bytes for the array length). + * + * @return the number of bits in this Bloom filter's underlying bit array + * @since 35.0 + */ + public long bitSize() { return bits.bitSize(); }