|
30 | 30 |
|
31 | 31 | /** |
32 | 32 | * Tests for {@link BinaryRowSerializer#deserialize(BinaryRow, org.apache.paimon.io.DataInputView)}, |
33 | | - * focusing on the REUSE_SHRINK_THRESHOLD behavior. |
| 33 | + * focusing on the combined cap + ratio shrink behavior. |
34 | 34 | */ |
35 | 35 | class BinaryRowSerializerShrinkTest { |
36 | 36 |
|
37 | | - private static final int SHRINK_THRESHOLD = 4 * 1024 * 1024; // 4MB |
| 37 | + private static final int MAX_RETAINED = 4 * 1024 * 1024; // 4MB |
38 | 38 |
|
39 | 39 | @Test |
40 | | - void testDeserializeShrinksOversizedReuseBuffer() throws Exception { |
| 40 | + void testShrinksWhenSpikeFollowedBySmallRecord() throws Exception { |
41 | 41 | BinaryRowSerializer serializer = new BinaryRowSerializer(1); |
42 | 42 |
|
43 | | - // Serialize a large record (> 4MB) |
| 43 | + // Inflate buffer with a large record (> 4MB) |
44 | 44 | BinaryRow largeRow = createRowWithPayload(5 * 1024 * 1024); |
45 | 45 | byte[] largeBytes = serializeRow(serializer, largeRow); |
46 | 46 |
|
47 | | - // Deserialize into a fresh reuse row — buffer grows to hold the large record |
48 | 47 | BinaryRow reuse = serializer.createInstance(); |
49 | | - DataInputDeserializer largeInput = new DataInputDeserializer(largeBytes); |
50 | | - reuse = serializer.deserialize(reuse, largeInput); |
| 48 | + reuse = serializer.deserialize(reuse, new DataInputDeserializer(largeBytes)); |
51 | 49 | int largeBufferSize = reuse.getSegments()[0].size(); |
52 | 50 | assertThat(largeBufferSize).isGreaterThanOrEqualTo(5 * 1024 * 1024); |
53 | 51 |
|
54 | | - // Serialize a small record |
| 52 | + // Deserialize a small record — buffer > 4MB and ratio huge > 4x → shrink |
55 | 53 | BinaryRow smallRow = createRowWithPayload(100); |
56 | 54 | byte[] smallBytes = serializeRow(serializer, smallRow); |
| 55 | + reuse = serializer.deserialize(reuse, new DataInputDeserializer(smallBytes)); |
| 56 | + assertThat(reuse.getSegments()[0].size()).isLessThan(MAX_RETAINED); |
| 57 | + } |
| 58 | + |
| 59 | + @Test |
| 60 | + void testShrinksWhenSpikeFollowedByMediumRecord() throws Exception { |
| 61 | + BinaryRowSerializer serializer = new BinaryRowSerializer(1); |
| 62 | + |
| 63 | + // Inflate buffer with a very large record (100MB) |
| 64 | + BinaryRow hugeRow = createRowWithPayload(100 * 1024 * 1024); |
| 65 | + byte[] hugeBytes = serializeRow(serializer, hugeRow); |
57 | 66 |
|
58 | | - // Deserialize the small record into the same reuse row |
59 | | - // The oversized buffer (> 4MB) should be shrunk to the exact size needed |
60 | | - DataInputDeserializer smallInput = new DataInputDeserializer(smallBytes); |
61 | | - reuse = serializer.deserialize(reuse, smallInput); |
62 | | - int shrunkBufferSize = reuse.getSegments()[0].size(); |
63 | | - assertThat(shrunkBufferSize).isLessThan(SHRINK_THRESHOLD); |
| 67 | + BinaryRow reuse = serializer.createInstance(); |
| 68 | + reuse = serializer.deserialize(reuse, new DataInputDeserializer(hugeBytes)); |
| 69 | + int hugeBufferSize = reuse.getSegments()[0].size(); |
| 70 | + assertThat(hugeBufferSize).isGreaterThanOrEqualTo(100 * 1024 * 1024); |
| 71 | + |
| 72 | + // Deserialize a 5MB record — buffer ~100MB, ratio ~20x > 4x → shrink |
| 73 | + BinaryRow mediumRow = createRowWithPayload(5 * 1024 * 1024); |
| 74 | + byte[] mediumBytes = serializeRow(serializer, mediumRow); |
| 75 | + reuse = serializer.deserialize(reuse, new DataInputDeserializer(mediumBytes)); |
| 76 | + assertThat(reuse.getSegments()[0].size()).isLessThan(hugeBufferSize); |
64 | 77 | } |
65 | 78 |
|
66 | 79 | @Test |
67 | | - void testDeserializeKeepsSmallReuseBuffer() throws Exception { |
| 80 | + void testRetainsWhenBufferProportionalToRecordSize() throws Exception { |
68 | 81 | BinaryRowSerializer serializer = new BinaryRowSerializer(1); |
69 | 82 |
|
70 | | - // Serialize a small record (< 4MB) |
71 | | - BinaryRow row1 = createRowWithPayload(1024); |
| 83 | + // Inflate buffer with a 5MB record |
| 84 | + BinaryRow row1 = createRowWithPayload(5 * 1024 * 1024); |
72 | 85 | byte[] bytes1 = serializeRow(serializer, row1); |
73 | 86 |
|
74 | 87 | BinaryRow reuse = serializer.createInstance(); |
75 | | - DataInputDeserializer input1 = new DataInputDeserializer(bytes1); |
76 | | - reuse = serializer.deserialize(reuse, input1); |
77 | | - int bufferSize1 = reuse.getSegments()[0].size(); |
| 88 | + reuse = serializer.deserialize(reuse, new DataInputDeserializer(bytes1)); |
| 89 | + int bufferAfterFirst = reuse.getSegments()[0].size(); |
| 90 | + assertThat(bufferAfterFirst).isGreaterThan(MAX_RETAINED); |
78 | 91 |
|
79 | | - // Serialize an even smaller record |
80 | | - BinaryRow row2 = createRowWithPayload(100); |
| 92 | + // Deserialize another record just above threshold — ratio ~1.2x < 4x → retain |
| 93 | + BinaryRow row2 = createRowWithPayload(MAX_RETAINED + 100); |
81 | 94 | byte[] bytes2 = serializeRow(serializer, row2); |
82 | | - |
83 | | - // Deserialize — buffer should be reused (not shrunk), since it's < 4MB |
84 | | - DataInputDeserializer input2 = new DataInputDeserializer(bytes2); |
85 | | - reuse = serializer.deserialize(reuse, input2); |
86 | | - int bufferSize2 = reuse.getSegments()[0].size(); |
87 | | - assertThat(bufferSize2).isEqualTo(bufferSize1); |
| 95 | + reuse = serializer.deserialize(reuse, new DataInputDeserializer(bytes2)); |
| 96 | + assertThat(reuse.getSegments()[0].size()).isEqualTo(bufferAfterFirst); |
88 | 97 | } |
89 | 98 |
|
90 | 99 | @Test |
91 | | - void testDeserializeRetainsBufferForConsecutiveLargeRecords() throws Exception { |
| 100 | + void testKeepsSmallBuffer() throws Exception { |
92 | 101 | BinaryRowSerializer serializer = new BinaryRowSerializer(1); |
93 | 102 |
|
94 | | - // Serialize a large record (> 4MB) to inflate the buffer |
95 | | - BinaryRow largeRow1 = createRowWithPayload(5 * 1024 * 1024); |
96 | | - byte[] largeBytes1 = serializeRow(serializer, largeRow1); |
| 103 | + BinaryRow row1 = createRowWithPayload(1024); |
| 104 | + byte[] bytes1 = serializeRow(serializer, row1); |
97 | 105 |
|
98 | 106 | BinaryRow reuse = serializer.createInstance(); |
99 | | - DataInputDeserializer input1 = new DataInputDeserializer(largeBytes1); |
100 | | - reuse = serializer.deserialize(reuse, input1); |
101 | | - int bufferAfterFirst = reuse.getSegments()[0].size(); |
102 | | - assertThat(bufferAfterFirst).isGreaterThanOrEqualTo(5 * 1024 * 1024); |
103 | | - |
104 | | - // Deserialize another large record (also > 4MB) |
105 | | - // Hysteresis: buffer should NOT be shrunk because the incoming record is also large |
106 | | - BinaryRow largeRow2 = createRowWithPayload(SHRINK_THRESHOLD + 100); |
107 | | - byte[] largeBytes2 = serializeRow(serializer, largeRow2); |
| 107 | + reuse = serializer.deserialize(reuse, new DataInputDeserializer(bytes1)); |
| 108 | + int bufferSize1 = reuse.getSegments()[0].size(); |
108 | 109 |
|
109 | | - DataInputDeserializer input2 = new DataInputDeserializer(largeBytes2); |
110 | | - reuse = serializer.deserialize(reuse, input2); |
111 | | - int bufferAfterSecond = reuse.getSegments()[0].size(); |
112 | | - assertThat(bufferAfterSecond).isEqualTo(bufferAfterFirst); |
| 110 | + // Smaller record — buffer < 4MB, should reuse without shrinking |
| 111 | + BinaryRow row2 = createRowWithPayload(100); |
| 112 | + byte[] bytes2 = serializeRow(serializer, row2); |
| 113 | + reuse = serializer.deserialize(reuse, new DataInputDeserializer(bytes2)); |
| 114 | + assertThat(reuse.getSegments()[0].size()).isEqualTo(bufferSize1); |
113 | 115 | } |
114 | 116 |
|
115 | 117 | @Test |
116 | | - void testDeserializeGrowsBufferWhenNeeded() throws Exception { |
| 118 | + void testGrowsBufferWhenNeeded() throws Exception { |
117 | 119 | BinaryRowSerializer serializer = new BinaryRowSerializer(1); |
118 | 120 |
|
119 | | - // Start with a small record |
120 | 121 | BinaryRow smallRow = createRowWithPayload(100); |
121 | 122 | byte[] smallBytes = serializeRow(serializer, smallRow); |
122 | 123 |
|
123 | 124 | BinaryRow reuse = serializer.createInstance(); |
124 | | - DataInputDeserializer smallInput = new DataInputDeserializer(smallBytes); |
125 | | - reuse = serializer.deserialize(reuse, smallInput); |
| 125 | + reuse = serializer.deserialize(reuse, new DataInputDeserializer(smallBytes)); |
126 | 126 |
|
127 | | - // Deserialize a larger record — buffer should grow |
| 127 | + // Larger record arrives — buffer must grow |
128 | 128 | BinaryRow largerRow = createRowWithPayload(2048); |
129 | 129 | byte[] largerBytes = serializeRow(serializer, largerRow); |
130 | | - |
131 | | - DataInputDeserializer largerInput = new DataInputDeserializer(largerBytes); |
132 | | - reuse = serializer.deserialize(reuse, largerInput); |
| 130 | + reuse = serializer.deserialize(reuse, new DataInputDeserializer(largerBytes)); |
133 | 131 | assertThat(reuse.getSegments()[0].size()).isGreaterThanOrEqualTo(2048); |
134 | 132 | } |
135 | 133 |
|
| 134 | + @Test |
| 135 | + void testRetainsBufferForConsecutiveLargeRecords() throws Exception { |
| 136 | + BinaryRowSerializer serializer = new BinaryRowSerializer(1); |
| 137 | + |
| 138 | + // Inflate buffer with 5MB record |
| 139 | + BinaryRow largeRow1 = createRowWithPayload(5 * 1024 * 1024); |
| 140 | + byte[] largeBytes1 = serializeRow(serializer, largeRow1); |
| 141 | + |
| 142 | + BinaryRow reuse = serializer.createInstance(); |
| 143 | + reuse = serializer.deserialize(reuse, new DataInputDeserializer(largeBytes1)); |
| 144 | + int bufferAfterFirst = reuse.getSegments()[0].size(); |
| 145 | + |
| 146 | + // Another 5MB record — ratio ~1x < 4x → retain |
| 147 | + BinaryRow largeRow2 = createRowWithPayload(5 * 1024 * 1024); |
| 148 | + byte[] largeBytes2 = serializeRow(serializer, largeRow2); |
| 149 | + reuse = serializer.deserialize(reuse, new DataInputDeserializer(largeBytes2)); |
| 150 | + assertThat(reuse.getSegments()[0].size()).isEqualTo(bufferAfterFirst); |
| 151 | + } |
| 152 | + |
136 | 153 | private static BinaryRow createRowWithPayload(int payloadSize) { |
137 | 154 | BinaryRow row = new BinaryRow(1); |
138 | 155 | BinaryRowWriter writer = new BinaryRowWriter(row, payloadSize + 32); |
|
0 commit comments