Skip to content

Commit c34123f

Browse files
0x26resaandres3
andauthored
GH-3112: Fix reading of proto Uint32Value (#3113)
* Fix reading of proto Uint32Value * Add test for overflow * Apply spotless * Roll back change to original test * Better uint32 example --------- Co-authored-by: aandres3 <aandres@tradewelltech.co>
1 parent 53d7842 commit c34123f

6 files changed

Lines changed: 72 additions & 19 deletions

File tree

parquet-protobuf/src/main/java/org/apache/parquet/proto/ProtoMessageConverter.java

Lines changed: 6 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -763,6 +763,12 @@ public ProtoUInt32ValueConverter(ParentValueContainer parent) {
763763
this.parent = parent;
764764
}
765765

766+
@Override
767+
public void addInt(int value) {
768+
parent.add(UInt32Value.of(value));
769+
}
770+
771+
// This is left for backward compatibility with the old implementation which used int64 for uint32
766772
@Override
767773
public void addLong(long value) {
768774
parent.add(UInt32Value.of(Math.toIntExact(value)));

parquet-protobuf/src/main/java/org/apache/parquet/proto/ProtoSchemaConverter.java

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -260,7 +260,7 @@ private <T> Builder<? extends Builder<?, GroupBuilder<T>>, GroupBuilder<T>> addF
260260
return builder.primitive(INT32, getRepetition(descriptor));
261261
}
262262
if (messageType.equals(UInt32Value.getDescriptor())) {
263-
return builder.primitive(INT64, getRepetition(descriptor));
263+
return builder.primitive(INT32, getRepetition(descriptor));
264264
}
265265
if (messageType.equals(BytesValue.getDescriptor())) {
266266
return builder.primitive(BINARY, getRepetition(descriptor));

parquet-protobuf/src/main/java/org/apache/parquet/proto/ProtoWriteSupport.java

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -766,7 +766,7 @@ void writeRawValue(Object value) {
766766
class UInt32ValueWriter extends FieldWriter {
767767
@Override
768768
void writeRawValue(Object value) {
769-
recordConsumer.addLong(((UInt32Value) value).getValue());
769+
recordConsumer.addInteger(((UInt32Value) value).getValue());
770770
}
771771
}
772772

parquet-protobuf/src/test/java/org/apache/parquet/proto/ProtoInputOutputFormatTest.java

Lines changed: 28 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -678,6 +678,34 @@ public void testProto3WrappedMessageClass() throws Exception {
678678
assertEquals(msgNonEmpty, result.get(1));
679679
}
680680

681+
@Test
682+
public void testProto3Uint32Behaviour() throws Exception {
683+
684+
TestProto3.SchemaConverterAllDatatypes intMin = TestProto3.SchemaConverterAllDatatypes.newBuilder()
685+
.setOptionalUInt32(Integer.MIN_VALUE)
686+
.build();
687+
assertEquals(intMin.toString(), "optionalUInt32: 2147483648\n");
688+
TestProto3.SchemaConverterAllDatatypes uintMin = TestProto3.SchemaConverterAllDatatypes.newBuilder()
689+
.setOptionalUInt32(-1)
690+
.build();
691+
assertEquals(uintMin.toString(), "optionalUInt32: 4294967295\n");
692+
TestProto3.SchemaConverterAllDatatypes uintMax = TestProto3.SchemaConverterAllDatatypes.newBuilder()
693+
.setOptionalUInt32(Integer.MAX_VALUE)
694+
.build();
695+
assertEquals(uintMax.toString(), "optionalUInt32: 2147483647\n");
696+
697+
Configuration conf = new Configuration();
698+
Path outputPath = new WriteUsingMR(conf).write(intMin, uintMin, uintMax);
699+
ReadUsingMR readUsingMR = new ReadUsingMR(conf);
700+
String customClass = TestProto3.SchemaConverterAllDatatypes.class.getName();
701+
ProtoReadSupport.setProtobufClass(readUsingMR.getConfiguration(), customClass);
702+
List<Message> result = readUsingMR.read(outputPath);
703+
704+
assertEquals(result.get(0), intMin);
705+
assertEquals(result.get(1), uintMin);
706+
assertEquals(result.get(2), uintMax);
707+
}
708+
681709
/**
682710
* Runs job that writes input to file and then job reading data back.
683711
*/

parquet-protobuf/src/test/java/org/apache/parquet/proto/ProtoSchemaConverterTest.java

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -400,7 +400,7 @@ public void testProto3ConvertWrappedMessageUnwrapped() throws Exception {
400400
+ " optional int64 wrappedInt64 = 3;\n"
401401
+ " optional int64 wrappedUInt64 = 4;\n"
402402
+ " optional int32 wrappedInt32 = 5;\n"
403-
+ " optional int64 wrappedUInt32 = 6;\n"
403+
+ " optional int32 wrappedUInt32 = 6;\n"
404404
+ " optional boolean wrappedBool = 7;\n"
405405
+ " optional binary wrappedString (UTF8) = 8;\n"
406406
+ " optional binary wrappedBytes = 9;\n"

parquet-protobuf/src/test/java/org/apache/parquet/proto/ProtoWriteSupportTest.java

Lines changed: 35 additions & 16 deletions
Original file line numberDiff line numberDiff line change
@@ -22,22 +22,7 @@
2222
import static org.junit.Assert.assertFalse;
2323
import static org.junit.Assert.assertTrue;
2424

25-
import com.google.protobuf.BoolValue;
26-
import com.google.protobuf.ByteString;
27-
import com.google.protobuf.BytesValue;
28-
import com.google.protobuf.Descriptors;
29-
import com.google.protobuf.DoubleValue;
30-
import com.google.protobuf.DynamicMessage;
31-
import com.google.protobuf.FloatValue;
32-
import com.google.protobuf.Int32Value;
33-
import com.google.protobuf.Int64Value;
34-
import com.google.protobuf.Message;
35-
import com.google.protobuf.MessageOrBuilder;
36-
import com.google.protobuf.StringValue;
37-
import com.google.protobuf.Timestamp;
38-
import com.google.protobuf.UInt32Value;
39-
import com.google.protobuf.UInt64Value;
40-
import com.google.protobuf.Value;
25+
import com.google.protobuf.*;
4126
import com.google.protobuf.util.Timestamps;
4227
import java.io.IOException;
4328
import java.time.LocalDate;
@@ -1372,6 +1357,40 @@ public void testProto3WrappedMessageUnwrappedRoundTrip() throws Exception {
13721357
gotBackFirst.getWrappedBytes().getValue());
13731358
}
13741359

1360+
@Test
1361+
public void testProto3WrappedMessageUnwrappedRoundTripUint32() throws Exception {
1362+
1363+
TestProto3.WrappedMessage msgMin = TestProto3.WrappedMessage.newBuilder()
1364+
.setWrappedUInt32(UInt32Value.of(Integer.MIN_VALUE))
1365+
.build();
1366+
assertEquals(TextFormat.shortDebugString(msgMin), "wrappedUInt32 { value: 2147483648 }");
1367+
1368+
TestProto3.WrappedMessage msgMax = TestProto3.WrappedMessage.newBuilder()
1369+
.setWrappedUInt32(UInt32Value.of(Integer.MAX_VALUE))
1370+
.build();
1371+
assertEquals(TextFormat.shortDebugString(msgMax), "wrappedUInt32 { value: 2147483647 }");
1372+
1373+
TestProto3.WrappedMessage msgMinusOne = TestProto3.WrappedMessage.newBuilder()
1374+
.setWrappedUInt32(UInt32Value.of(-1))
1375+
.build();
1376+
assertEquals(TextFormat.shortDebugString(msgMinusOne), "wrappedUInt32 { value: 4294967295 }");
1377+
1378+
Path tmpFilePath = TestUtils.someTemporaryFilePath();
1379+
ParquetWriter<MessageOrBuilder> writer = ProtoParquetWriter.<MessageOrBuilder>builder(tmpFilePath)
1380+
.withMessage(TestProto3.WrappedMessage.class)
1381+
.config(ProtoWriteSupport.PB_UNWRAP_PROTO_WRAPPERS, "true")
1382+
.build();
1383+
writer.write(msgMin);
1384+
writer.write(msgMax);
1385+
writer.write(msgMinusOne);
1386+
writer.close();
1387+
List<TestProto3.WrappedMessage> gotBack = TestUtils.readMessages(tmpFilePath, TestProto3.WrappedMessage.class);
1388+
1389+
assertEquals(msgMin, gotBack.get(0));
1390+
assertEquals(msgMax, gotBack.get(1));
1391+
assertEquals(msgMinusOne, gotBack.get(2));
1392+
}
1393+
13751394
@Test
13761395
public void testProto3WrappedMessageWithNullsRoundTrip() throws Exception {
13771396
TestProto3.WrappedMessage.Builder msg = TestProto3.WrappedMessage.newBuilder();

0 commit comments

Comments
 (0)