Skip to content

Commit 1274733

Browse files
committed
[GH-3414] Add parseJson to VariantBuilder for JSON-to-Variant conversion
1 parent 6e2f7bb commit 1274733

File tree

3 files changed

+472
-0
lines changed

3 files changed

+472
-0
lines changed

parquet-variant/pom.xml

Lines changed: 11 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -46,6 +46,17 @@
4646
<artifactId>parquet-column</artifactId>
4747
<version>${project.version}</version>
4848
</dependency>
49+
<dependency>
50+
<groupId>${jackson.groupId}</groupId>
51+
<artifactId>jackson-core</artifactId>
52+
<version>${jackson.version}</version>
53+
</dependency>
54+
<dependency>
55+
<groupId>org.apache.parquet</groupId>
56+
<artifactId>parquet-jackson</artifactId>
57+
<version>${project.version}</version>
58+
<scope>runtime</scope>
59+
</dependency>
4960
<dependency>
5061
<groupId>com.google.guava</groupId>
5162
<artifactId>guava</artifactId>

parquet-variant/src/main/java/org/apache/parquet/variant/VariantBuilder.java

Lines changed: 154 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -16,6 +16,12 @@
1616
*/
1717
package org.apache.parquet.variant;
1818

19+
import com.fasterxml.jackson.core.JsonFactory;
20+
import com.fasterxml.jackson.core.JsonParseException;
21+
import com.fasterxml.jackson.core.JsonParser;
22+
import com.fasterxml.jackson.core.JsonToken;
23+
import com.fasterxml.jackson.core.exc.InputCoercionException;
24+
import java.io.IOException;
1925
import java.math.BigDecimal;
2026
import java.math.BigInteger;
2127
import java.nio.ByteBuffer;
@@ -30,6 +36,8 @@
3036
*/
3137
public class VariantBuilder {
3238

39+
private static final JsonFactory JSON_FACTORY = new JsonFactory();
40+
3341
/**
3442
* The buffer for building the Variant value. The first `writePos` bytes have been written.
3543
*/
@@ -65,6 +73,152 @@ public VariantBuilder(Metadata metadata) {
6573
this.metadata = metadata;
6674
}
6775

76+
/**
77+
* Parses a JSON string and returns the corresponding {@link Variant}.
78+
*
79+
* <p>Uses Jackson streaming parser for single-pass conversion
80+
* with no intermediate tree. Number handling preserves precision:
81+
* integers use the smallest fitting type, floating-point numbers
82+
* prefer decimal encoding (no scientific notation) and fall back
83+
* to double.
84+
*
85+
* <p>Ported from Apache Spark's {@code VariantBuilder.parseJson}.
86+
*
87+
* @param json the JSON string to parse
88+
* @return the parsed Variant
89+
* @throws IOException if the JSON is malformed or an I/O error occurs
90+
*/
91+
public static Variant parseJson(String json) throws IOException {
92+
try (JsonParser parser = JSON_FACTORY.createParser(json)) {
93+
parser.nextToken();
94+
return parseJson(parser);
95+
}
96+
}
97+
98+
/**
99+
* Parses a JSON value from an already-positioned {@link JsonParser}
100+
* and returns the corresponding {@link Variant}. The parser must
101+
* have its current token set (i.e., {@code parser.nextToken()}
102+
* or equivalent must have been called).
103+
*
104+
* @param parser a positioned Jackson JsonParser
105+
* @return the parsed Variant
106+
* @throws IOException if the JSON is malformed or an I/O error occurs
107+
*/
108+
public static Variant parseJson(JsonParser parser) throws IOException {
109+
VariantBuilder builder = new VariantBuilder();
110+
buildJson(builder, parser);
111+
return builder.build();
112+
}
113+
114+
/**
115+
* Recursively builds a Variant value from the current position of a
116+
* Jackson streaming parser. Handles objects, arrays, strings, numbers
117+
* (int/long/decimal/double), booleans, and null.
118+
*/
119+
private static void buildJson(VariantBuilder builder, JsonParser parser) throws IOException {
120+
JsonToken token = parser.currentToken();
121+
if (token == null) {
122+
throw new JsonParseException(parser, "Unexpected null token");
123+
}
124+
switch (token) {
125+
case START_OBJECT:
126+
buildJsonObject(builder, parser);
127+
break;
128+
case START_ARRAY:
129+
buildJsonArray(builder, parser);
130+
break;
131+
case VALUE_STRING:
132+
builder.appendString(parser.getText());
133+
break;
134+
case VALUE_NUMBER_INT:
135+
buildJsonInteger(builder, parser);
136+
break;
137+
case VALUE_NUMBER_FLOAT:
138+
buildJsonFloat(builder, parser);
139+
break;
140+
case VALUE_TRUE:
141+
builder.appendBoolean(true);
142+
break;
143+
case VALUE_FALSE:
144+
builder.appendBoolean(false);
145+
break;
146+
case VALUE_NULL:
147+
builder.appendNull();
148+
break;
149+
default:
150+
throw new JsonParseException(parser, "Unexpected token " + token);
151+
}
152+
}
153+
154+
private static void buildJsonObject(VariantBuilder builder, JsonParser parser) throws IOException {
155+
VariantObjectBuilder obj = builder.startObject();
156+
while (parser.nextToken() != JsonToken.END_OBJECT) {
157+
obj.appendKey(parser.currentName());
158+
parser.nextToken();
159+
buildJson(obj, parser);
160+
}
161+
builder.endObject();
162+
}
163+
164+
private static void buildJsonArray(VariantBuilder builder, JsonParser parser) throws IOException {
165+
VariantArrayBuilder arr = builder.startArray();
166+
while (parser.nextToken() != JsonToken.END_ARRAY) {
167+
buildJson(arr, parser);
168+
}
169+
builder.endArray();
170+
}
171+
172+
private static void buildJsonInteger(VariantBuilder builder, JsonParser parser) throws IOException {
173+
try {
174+
appendSmallestLong(builder, parser.getLongValue());
175+
} catch (InputCoercionException ignored) {
176+
buildJsonFloat(builder, parser);
177+
}
178+
}
179+
180+
private static void buildJsonFloat(VariantBuilder builder, JsonParser parser) throws IOException {
181+
if (!tryAppendDecimal(builder, parser.getText())) {
182+
builder.appendDouble(parser.getDoubleValue());
183+
}
184+
}
185+
186+
/**
187+
* Appends a long value using the smallest integer type that fits.
188+
*/
189+
private static void appendSmallestLong(VariantBuilder builder, long l) {
190+
if (l == (byte) l) {
191+
builder.appendByte((byte) l);
192+
} else if (l == (short) l) {
193+
builder.appendShort((short) l);
194+
} else if (l == (int) l) {
195+
builder.appendInt((int) l);
196+
} else {
197+
builder.appendLong(l);
198+
}
199+
}
200+
201+
/**
202+
* Tries to parse a number string as a decimal. Only accepts plain
203+
* decimal format (digits, minus, dot -- no scientific notation).
204+
* Returns true if the number was successfully appended as a decimal.
205+
* Ported from Spark's {@code tryParseDecimal}.
206+
*/
207+
private static boolean tryAppendDecimal(VariantBuilder builder, String input) {
208+
for (int i = 0; i < input.length(); i++) {
209+
char ch = input.charAt(i);
210+
if (ch != '-' && ch != '.' && !(ch >= '0' && ch <= '9')) {
211+
return false;
212+
}
213+
}
214+
BigDecimal d = new BigDecimal(input);
215+
if (d.scale() <= VariantUtil.MAX_DECIMAL16_PRECISION && d.precision() <= VariantUtil.MAX_DECIMAL16_PRECISION) {
216+
builder.appendDecimal(d);
217+
return true;
218+
}
219+
return false;
220+
}
221+
68222
/**
69223
* @return the Variant value
70224
*/

0 commit comments

Comments
 (0)