Skip to content

Commit 341dfb4

Browse files
committed
Splitup test
1 parent 3a13583 commit 341dfb4

3 files changed

Lines changed: 641 additions & 449 deletions

File tree

Lines changed: 240 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,240 @@
1+
/*
2+
* Licensed to the Apache Software Foundation (ASF) under one
3+
* or more contributor license agreements. See the NOTICE file
4+
* distributed with this work for additional information
5+
* regarding copyright ownership. The ASF licenses this file
6+
* to you under the Apache License, Version 2.0 (the
7+
* "License"); you may not use this file except in compliance
8+
* with the License. You may obtain a copy of the License at
9+
*
10+
* http://www.apache.org/licenses/LICENSE-2.0
11+
*
12+
* Unless required by applicable law or agreed to in writing,
13+
* software distributed under the License is distributed on an
14+
* "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
15+
* KIND, either express or implied. See the License for the
16+
* specific language governing permissions and limitations
17+
* under the License.
18+
*/
19+
package org.apache.parquet.variant;
20+
21+
import java.nio.ByteBuffer;
22+
import java.nio.ByteOrder;
23+
import java.nio.charset.StandardCharsets;
24+
import java.security.SecureRandom;
25+
import java.time.LocalDate;
26+
import java.util.*;
27+
import java.util.function.Consumer;
28+
import org.junit.Assert;
29+
import org.junit.Test;
30+
import org.slf4j.Logger;
31+
import org.slf4j.LoggerFactory;
32+
33+
public class TestVariantArray {
34+
private static final Logger LOG = LoggerFactory.getLogger(TestVariantArray.class);
35+
private static final String RANDOM_CHARS = "ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789";
36+
37+
/** Random number generator for generating random strings */
38+
private static SecureRandom random = new SecureRandom(new byte[] {1, 2, 3, 4, 5});
39+
40+
private static final ByteBuffer EMPTY_METADATA = ByteBuffer.wrap(new byte[] {0b1});
41+
42+
private static final byte[] VALUE_NULL = new byte[] {primitiveHeader(0)};
43+
private static final byte[] VALUE_BOOL = new byte[] {primitiveHeader(1)};
44+
private static final byte[] VALUE_INT = new byte[] {primitiveHeader(5), (byte) 0xD2, 0x02, (byte) 0x96, 0x49};
45+
private static final byte[] VALUE_STRING =
46+
new byte[] {primitiveHeader(16), 0x07, 0x00, 0x00, 0x00, 'v', 'a', 'r', 'i', 'a', 'n', 't'};
47+
private static final byte[] VALUE_SHORT_STRING = new byte[] {0b101, 'c'};
48+
private static final byte[] VALUE_DATE = new byte[] {0b101100, (byte) 0xE3, 0x4E, 0x00, 0x00};
49+
50+
private void checkType(Variant v, int expectedBasicType, VariantUtil.Type expectedType) {
51+
Assert.assertEquals(expectedBasicType, v.value.get(v.value.position()) & VariantUtil.BASIC_TYPE_MASK);
52+
Assert.assertEquals(expectedType, v.getType());
53+
}
54+
55+
private String randomString(int len) {
56+
StringBuilder sb = new StringBuilder(len);
57+
for (int i = 0; i < len; i++) {
58+
sb.append(RANDOM_CHARS.charAt(random.nextInt(RANDOM_CHARS.length())));
59+
}
60+
return sb.toString();
61+
}
62+
63+
private void testVariant(Variant v, Consumer<Variant> consumer) {
64+
consumer.accept(v);
65+
// Create new Variant with different byte offsets
66+
byte[] newValue = new byte[v.value.capacity() + 50];
67+
byte[] newMetadata = new byte[v.metadata.capacity() + 50];
68+
Arrays.fill(newValue, (byte) 0xFF);
69+
Arrays.fill(newMetadata, (byte) 0xFF);
70+
v.value.position(0);
71+
v.value.get(newValue, 25, v.value.capacity());
72+
v.value.position(0);
73+
v.metadata.position(0);
74+
v.metadata.get(newMetadata, 25, v.metadata.capacity());
75+
v.metadata.position(0);
76+
Variant v2 = new Variant(
77+
ByteBuffer.wrap(newValue, 25, v.value.capacity()),
78+
ByteBuffer.wrap(newMetadata, 25, v.metadata.capacity()));
79+
consumer.accept(v2);
80+
}
81+
82+
private static byte primitiveHeader(int type) {
83+
return (byte) (type << 2);
84+
}
85+
86+
private static int getMinIntegerSize(int value) {
87+
return (value <= 0xFF) ? 1 : (value <= 0xFFFF) ? 2 : (value <= 0xFFFFFF) ? 3 : 4;
88+
}
89+
90+
private static void writeVarlenInt(ByteBuffer buffer, int value, int valueSize) {
91+
if (valueSize == 1) {
92+
buffer.put((byte) value);
93+
} else if (valueSize == 2) {
94+
buffer.putShort((short) value);
95+
} else if (valueSize == 3) {
96+
buffer.put((byte) (value & 0xFF));
97+
buffer.put((byte) ((value >> 8) & 0xFF));
98+
buffer.put((byte) ((value >> 16) & 0xFF));
99+
} else {
100+
buffer.putInt(value);
101+
}
102+
}
103+
104+
private static byte[] constructString(String value) {
105+
return ByteBuffer.allocate(value.length() + 5)
106+
.order(ByteOrder.LITTLE_ENDIAN)
107+
.put(primitiveHeader(16))
108+
.putInt(value.length())
109+
.put(value.getBytes(StandardCharsets.UTF_8))
110+
.array();
111+
}
112+
113+
private static byte[] constructArray(byte[]... elements) {
114+
int dataSize = 0;
115+
for (byte[] element : elements) {
116+
dataSize += element.length;
117+
}
118+
119+
boolean isLarge = elements.length > 0xFF;
120+
int offsetSize = getMinIntegerSize(dataSize);
121+
int headerSize = 1 + (isLarge ? 4 : 1) + (elements.length + 1) * offsetSize;
122+
123+
ByteBuffer output = ByteBuffer.allocate(headerSize + dataSize).order(ByteOrder.LITTLE_ENDIAN);
124+
125+
output.put(VariantUtil.arrayHeader(isLarge, offsetSize));
126+
127+
if (isLarge) {
128+
output.putInt(elements.length);
129+
} else {
130+
output.put((byte) elements.length);
131+
}
132+
133+
int currOffset = 0;
134+
for (int i = 0; i < elements.length; ++i) {
135+
writeVarlenInt(output, currOffset, offsetSize);
136+
currOffset += elements[i].length;
137+
}
138+
writeVarlenInt(output, currOffset, offsetSize);
139+
140+
for (int i = 0; i < elements.length; ++i) {
141+
output.put(elements[i]);
142+
}
143+
output.flip();
144+
return output.array();
145+
}
146+
147+
@Test
148+
public void testEmptyArray() {
149+
Variant value = new Variant(ByteBuffer.wrap(new byte[] {0b0011, 0x00}), EMPTY_METADATA);
150+
testVariant(value, v -> {
151+
checkType(v, VariantUtil.ARRAY, VariantUtil.Type.ARRAY);
152+
Assert.assertEquals(0, v.numArrayElements());
153+
});
154+
}
155+
156+
@Test
157+
public void testEmptyLargeArray() {
158+
Variant value = new Variant(ByteBuffer.wrap(new byte[] {0b10011, 0x00, 0x00, 0x00, 0x00}), EMPTY_METADATA);
159+
testVariant(value, v -> {
160+
checkType(v, VariantUtil.ARRAY, VariantUtil.Type.ARRAY);
161+
Assert.assertEquals(0, v.numArrayElements());
162+
});
163+
}
164+
165+
@Test
166+
public void testLargeArraySize() {
167+
Variant value = new Variant(
168+
ByteBuffer.wrap(new byte[] {0b10011, (byte) 0xFF, (byte) 0x01, 0x00, 0x00}), EMPTY_METADATA);
169+
testVariant(value, v -> {
170+
checkType(v, VariantUtil.ARRAY, VariantUtil.Type.ARRAY);
171+
Assert.assertEquals(511, v.numArrayElements());
172+
});
173+
}
174+
175+
@Test
176+
public void testMixedArray() {
177+
byte[] nested = constructArray(VALUE_INT, VALUE_NULL, VALUE_SHORT_STRING);
178+
Variant value = new Variant(
179+
ByteBuffer.wrap(constructArray(VALUE_DATE, VALUE_BOOL, VALUE_INT, VALUE_STRING, nested)),
180+
EMPTY_METADATA);
181+
182+
testVariant(value, v -> {
183+
checkType(v, VariantUtil.ARRAY, VariantUtil.Type.ARRAY);
184+
Assert.assertEquals(5, v.numArrayElements());
185+
checkType(v.getElementAtIndex(0), VariantUtil.PRIMITIVE, VariantUtil.Type.DATE);
186+
Assert.assertEquals(
187+
LocalDate.parse("2025-04-17"),
188+
LocalDate.ofEpochDay(v.getElementAtIndex(0).getInt()));
189+
checkType(v.getElementAtIndex(1), VariantUtil.PRIMITIVE, VariantUtil.Type.BOOLEAN);
190+
Assert.assertTrue(v.getElementAtIndex(1).getBoolean());
191+
checkType(v.getElementAtIndex(2), VariantUtil.PRIMITIVE, VariantUtil.Type.INT);
192+
Assert.assertEquals(1234567890, v.getElementAtIndex(2).getInt());
193+
checkType(v.getElementAtIndex(3), VariantUtil.PRIMITIVE, VariantUtil.Type.STRING);
194+
Assert.assertEquals("variant", v.getElementAtIndex(3).getString());
195+
checkType(v.getElementAtIndex(4), VariantUtil.ARRAY, VariantUtil.Type.ARRAY);
196+
197+
Variant nestedV = v.getElementAtIndex(4);
198+
Assert.assertEquals(3, nestedV.numArrayElements());
199+
checkType(nestedV.getElementAtIndex(0), VariantUtil.PRIMITIVE, VariantUtil.Type.INT);
200+
Assert.assertEquals(1234567890, nestedV.getElementAtIndex(0).getInt());
201+
checkType(nestedV.getElementAtIndex(1), VariantUtil.PRIMITIVE, VariantUtil.Type.NULL);
202+
checkType(nestedV.getElementAtIndex(2), VariantUtil.SHORT_STR, VariantUtil.Type.STRING);
203+
Assert.assertEquals("c", nestedV.getElementAtIndex(2).getString());
204+
});
205+
}
206+
207+
public void testArrayOffsetSize(String randomString) {
208+
Variant value = new Variant(
209+
ByteBuffer.wrap(constructArray(constructString(randomString), VALUE_BOOL, VALUE_INT)), EMPTY_METADATA);
210+
211+
testVariant(value, v -> {
212+
checkType(v, VariantUtil.ARRAY, VariantUtil.Type.ARRAY);
213+
Assert.assertEquals(3, v.numArrayElements());
214+
checkType(v.getElementAtIndex(0), VariantUtil.PRIMITIVE, VariantUtil.Type.STRING);
215+
Assert.assertEquals(randomString, v.getElementAtIndex(0).getString());
216+
checkType(v.getElementAtIndex(1), VariantUtil.PRIMITIVE, VariantUtil.Type.BOOLEAN);
217+
Assert.assertTrue(v.getElementAtIndex(1).getBoolean());
218+
checkType(v.getElementAtIndex(2), VariantUtil.PRIMITIVE, VariantUtil.Type.INT);
219+
Assert.assertEquals(1234567890, v.getElementAtIndex(2).getInt());
220+
});
221+
}
222+
223+
@Test
224+
public void testArrayTwoByteOffset() {
225+
// a string larger than 255 bytes to push the value offset size above 1 byte
226+
testArrayOffsetSize(randomString(300));
227+
}
228+
229+
@Test
230+
public void testArrayThreeByteOffset() {
231+
// a string larger than 65535 bytes to push the value offset size above 2 bytes
232+
testArrayOffsetSize(randomString(70_000));
233+
}
234+
235+
@Test
236+
public void testArrayFourByteOffset() {
237+
// a string larger than 16777215 bytes to push the value offset size above 3 bytes
238+
testArrayOffsetSize(randomString(16_800_000));
239+
}
240+
}

0 commit comments

Comments
 (0)