Skip to content

Commit b9a5d32

Browse files
committed
GH-561 profile driven review of VariantConverters.
1 parent c1c333d commit b9a5d32

File tree

4 files changed

+38
-15
lines changed

4 files changed

+38
-15
lines changed

parquet-benchmarks/run.sh

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -100,6 +100,9 @@ else
100100
"filter")
101101
BENCHMARK_REGEX="org.apache.parquet.benchmarks.FilteringBenchmarks"
102102
;;
103+
"variant")
104+
BENCHMARK_REGEX="org.apache.parquet.benchmarks.Variant*"
105+
;;
103106
esac
104107

105108
echo JMH command: java -jar ${SCRIPT_PATH}/target/parquet-benchmarks.jar $BENCHMARK_REGEX $JMH_OPTIONS

parquet-benchmarks/src/main/java/org/apache/parquet/benchmarks/VariantProjectionBenchmark.java

Lines changed: 13 additions & 12 deletions
Original file line numberDiff line numberDiff line change
@@ -88,9 +88,9 @@
8888
* ./parquet-benchmarks/run.sh all org.apache.parquet.benchmarks.VariantProjectionBenchmark \
8989
* -wi 3 -i 5 -f 1 -foe true -rf json -rff target/results.json
9090
* </pre>
91-
* *
91+
*
9292
*/
93-
@Fork(0)
93+
@Fork(1)
9494
@State(Scope.Benchmark)
9595
@Warmup(iterations = 3)
9696
@Measurement(iterations = 5)
@@ -533,19 +533,14 @@ public RowRecord getCurrentRecord() {
533533
* schema and projected schemas are handled correctly.
534534
*/
535535
private static final class MessageConverter extends GroupConverter {
536-
private final int idIndex;
537-
private final int categoryIndex;
538-
private final int nestedIndex;
536+
539537
private final PrimitiveConverter idConverter;
540538
private final PrimitiveConverter categoryConverter;
541539
private final RowVariantGroupConverter variantConverter;
542540
private long id;
543541
private int category;
544542

545543
MessageConverter(MessageType schema, GroupType nestedGroup) {
546-
idIndex = schema.getFieldIndex("id");
547-
categoryIndex = schema.getFieldIndex("category");
548-
nestedIndex = schema.getFieldIndex("nested");
549544
idConverter = new PrimitiveConverter() {
550545
@Override
551546
public void addLong(long value) {
@@ -563,10 +558,16 @@ public void addInt(int value) {
563558

564559
@Override
565560
public Converter getConverter(int fieldIndex) {
566-
if (fieldIndex == idIndex) return idConverter;
567-
if (fieldIndex == categoryIndex) return categoryConverter;
568-
if (fieldIndex == nestedIndex) return variantConverter;
569-
throw new IllegalArgumentException("Unknown field index: " + fieldIndex);
561+
switch (fieldIndex) {
562+
case 0:
563+
return idConverter;
564+
case 1:
565+
return categoryConverter;
566+
case 2:
567+
return variantConverter;
568+
default:
569+
throw new IllegalArgumentException("Unknown field index: " + fieldIndex);
570+
}
570571
}
571572

572573
@Override

parquet-variant/src/main/java/org/apache/parquet/variant/VariantBuilder.java

Lines changed: 20 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -24,6 +24,7 @@
2424
import java.util.ArrayList;
2525
import java.util.Collections;
2626
import java.util.Set;
27+
import org.apache.parquet.io.api.Binary;
2728

2829
/**
2930
* Builder for creating Variant value and metadata.
@@ -109,7 +110,14 @@ public void appendEncodedValue(ByteBuffer value) {
109110
*/
110111
public void appendString(String str) {
111112
onAppend();
112-
byte[] data = str.getBytes(StandardCharsets.UTF_8);
113+
writeUTF8bytes(str.getBytes(StandardCharsets.UTF_8));
114+
}
115+
116+
/**
117+
* Write bytes as a UTF8 string.
118+
* @param data data to write; this is not modified.
119+
*/
120+
private void writeUTF8bytes(final byte[] data) {
113121
boolean longStr = data.length > VariantUtil.MAX_SHORT_STR_SIZE;
114122
checkCapacity((longStr ? 1 + VariantUtil.U32_SIZE : 1) + data.length);
115123
if (longStr) {
@@ -125,6 +133,17 @@ public void appendString(String str) {
125133
writePos += data.length;
126134
}
127135

136+
/**
137+
* Given a Binary, append it to the variant as a string.
138+
* When unmarshalling from partially shredded types, this saves string creation.
139+
* This is not for use by the VariantConverters.
140+
* @param binary source data.
141+
*/
142+
void appendAsString(Binary binary) {
143+
onAppend();
144+
writeUTF8bytes(binary.getBytesUnsafe());
145+
}
146+
128147
/**
129148
* Appends a null value to the Variant builder.
130149
*/

parquet-variant/src/main/java/org/apache/parquet/variant/VariantConverters.java

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -233,14 +233,14 @@ static class PartiallyShreddedFieldsConverter extends GroupConverter {
233233
PartiallyShreddedFieldsConverter(GroupType fieldsType, ParentConverter<VariantBuilder> parent) {
234234
this.converters = new Converter[fieldsType.getFieldCount()];
235235
this.parent = parent;
236+
ParentConverter<VariantObjectBuilder> newParent = converter -> converter.accept(objectBuilder);
236237

237238
for (int index = 0; index < fieldsType.getFieldCount(); index += 1) {
238239
Type field = fieldsType.getType(index);
239240
Preconditions.checkArgument(!field.isPrimitive(), "Invalid field group: " + field);
240241

241242
String name = field.getName();
242243
shreddedFieldNames.add(name);
243-
ParentConverter<VariantObjectBuilder> newParent = converter -> converter.accept(objectBuilder);
244244
converters[index] = new FieldValueConverter(name, field.asGroupType(), newParent);
245245
}
246246
}
@@ -501,7 +501,7 @@ static class VariantStringConverter extends ShreddedScalarConverter {
501501

502502
@Override
503503
public void addBinary(Binary value) {
504-
parent.build(builder -> builder.appendString(value.toStringUsingUTF8()));
504+
parent.build(builder -> builder.appendAsString(value));
505505
}
506506
}
507507

0 commit comments

Comments
 (0)