Skip to content

Commit 8d6afa9

Browse files
authored
PARQUET-2417: Add geometry and geography logical type annotations (#3200)
1 parent 236ddb9 commit 8d6afa9

10 files changed

Lines changed: 571 additions & 0 deletions

File tree

parquet-column/pom.xml

Lines changed: 6 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -76,6 +76,12 @@
7676
<version>${slf4j.version}</version>
7777
</dependency>
7878

79+
<dependency>
80+
<groupId>org.locationtech.jts</groupId>
81+
<artifactId>jts-core</artifactId>
82+
<version>${jts.version}</version>
83+
</dependency>
84+
7985
<dependency>
8086
<groupId>com.carrotsearch</groupId>
8187
<artifactId>junit-benchmarks</artifactId>
Lines changed: 64 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,64 @@
1+
/*
2+
* Licensed to the Apache Software Foundation (ASF) under one
3+
* or more contributor license agreements. See the NOTICE file
4+
* distributed with this work for additional information
5+
* regarding copyright ownership. The ASF licenses this file
6+
* to you under the Apache License, Version 2.0 (the
7+
* "License"); you may not use this file except in compliance
8+
* with the License. You may obtain a copy of the License at
9+
*
10+
* http://www.apache.org/licenses/LICENSE-2.0
11+
*
12+
* Unless required by applicable law or agreed to in writing,
13+
* software distributed under the License is distributed on an
14+
* "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
15+
* KIND, either express or implied. See the License for the
16+
* specific language governing permissions and limitations
17+
* under the License.
18+
*/
19+
package org.apache.parquet.column.schema;
20+
21+
/**
22+
* Edge interpolation algorithm for Geography logical type
23+
*/
24+
public enum EdgeInterpolationAlgorithm {
25+
SPHERICAL(0),
26+
VINCENTY(1),
27+
THOMAS(2),
28+
ANDOYER(3),
29+
KARNEY(4);
30+
31+
private final int value;
32+
33+
private EdgeInterpolationAlgorithm(int value) {
34+
this.value = value;
35+
}
36+
37+
/**
38+
* Get the integer value of this enum value, as defined in the Thrift IDL.
39+
*/
40+
public int getValue() {
41+
return value;
42+
}
43+
44+
/**
45+
* Find the enum type by its integer value, as defined in the Thrift IDL.
46+
* @return null if the value is not found.
47+
*/
48+
public static EdgeInterpolationAlgorithm findByValue(int value) {
49+
switch (value) {
50+
case 0:
51+
return SPHERICAL;
52+
case 1:
53+
return VINCENTY;
54+
case 2:
55+
return THOMAS;
56+
case 3:
57+
return ANDOYER;
58+
case 4:
59+
return KARNEY;
60+
default:
61+
throw new IllegalArgumentException("Unrecognized EdgeInterpolationAlgorithm value: " + value);
62+
}
63+
}
64+
}

parquet-column/src/main/java/org/apache/parquet/schema/LogicalTypeAnnotation.java

Lines changed: 168 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -41,8 +41,13 @@
4141
import java.util.Set;
4242
import java.util.function.Supplier;
4343
import org.apache.parquet.Preconditions;
44+
import org.apache.parquet.column.schema.EdgeInterpolationAlgorithm;
4445

4546
public abstract class LogicalTypeAnnotation {
47+
48+
public static final String DEFAULT_CRS = "OGC:CRS84";
49+
public static final EdgeInterpolationAlgorithm DEFAULT_ALGO = EdgeInterpolationAlgorithm.SPHERICAL;
50+
4651
enum LogicalTypeToken {
4752
MAP {
4853
@Override
@@ -155,6 +160,31 @@ protected LogicalTypeAnnotation fromString(List<String> params) {
155160
return float16Type();
156161
}
157162
},
163+
GEOMETRY {
164+
@Override
165+
protected LogicalTypeAnnotation fromString(List<String> params) {
166+
if (params.size() > 1) {
167+
throw new RuntimeException(
168+
"Expecting at most 1 parameter for geometry logical type, got " + params.size());
169+
}
170+
String crs = params.isEmpty() ? null : params.get(0);
171+
return geometryType(crs);
172+
}
173+
},
174+
GEOGRAPHY {
175+
@Override
176+
protected LogicalTypeAnnotation fromString(List<String> params) {
177+
if (params.size() > 2) {
178+
throw new RuntimeException(
179+
"Expecting at most 2 parameters for geography logical type (crs and edge algorithm), got "
180+
+ params.size());
181+
}
182+
String crs = !params.isEmpty() ? params.get(0) : null;
183+
EdgeInterpolationAlgorithm algo =
184+
params.size() > 1 ? EdgeInterpolationAlgorithm.valueOf(params.get(1)) : null;
185+
return geographyType(crs, algo);
186+
}
187+
},
158188
UNKNOWN {
159189
@Override
160190
protected LogicalTypeAnnotation fromString(List<String> params) {
@@ -334,6 +364,18 @@ public static Float16LogicalTypeAnnotation float16Type() {
334364
return Float16LogicalTypeAnnotation.INSTANCE;
335365
}
336366

367+
public static GeometryLogicalTypeAnnotation geometryType(String crs) {
368+
return new GeometryLogicalTypeAnnotation(crs);
369+
}
370+
371+
public static GeographyLogicalTypeAnnotation geographyType(String crs, EdgeInterpolationAlgorithm edgeAlgorithm) {
372+
return new GeographyLogicalTypeAnnotation(crs, edgeAlgorithm);
373+
}
374+
375+
public static GeographyLogicalTypeAnnotation geographyType() {
376+
return new GeographyLogicalTypeAnnotation(null, null);
377+
}
378+
337379
public static UnknownLogicalTypeAnnotation unknownType() {
338380
return UnknownLogicalTypeAnnotation.INSTANCE;
339381
}
@@ -1183,6 +1225,124 @@ public boolean equals(Object obj) {
11831225
}
11841226
}
11851227

1228+
public static class GeometryLogicalTypeAnnotation extends LogicalTypeAnnotation {
1229+
private final String crs;
1230+
1231+
private GeometryLogicalTypeAnnotation(String crs) {
1232+
this.crs = crs;
1233+
}
1234+
1235+
@Override
1236+
@Deprecated
1237+
public OriginalType toOriginalType() {
1238+
return null;
1239+
}
1240+
1241+
@Override
1242+
public <T> Optional<T> accept(LogicalTypeAnnotationVisitor<T> logicalTypeAnnotationVisitor) {
1243+
return logicalTypeAnnotationVisitor.visit(this);
1244+
}
1245+
1246+
@Override
1247+
LogicalTypeToken getType() {
1248+
return LogicalTypeToken.GEOMETRY;
1249+
}
1250+
1251+
@Override
1252+
protected String typeParametersAsString() {
1253+
if (crs == null || crs.isEmpty()) {
1254+
return "";
1255+
}
1256+
return String.format("(%s)", crs);
1257+
}
1258+
1259+
public String getCrs() {
1260+
return crs;
1261+
}
1262+
1263+
@Override
1264+
public boolean equals(Object obj) {
1265+
if (!(obj instanceof GeometryLogicalTypeAnnotation)) {
1266+
return false;
1267+
}
1268+
GeometryLogicalTypeAnnotation other = (GeometryLogicalTypeAnnotation) obj;
1269+
return Objects.equals(crs, other.crs);
1270+
}
1271+
1272+
@Override
1273+
public int hashCode() {
1274+
return Objects.hash(crs);
1275+
}
1276+
1277+
@Override
1278+
PrimitiveStringifier valueStringifier(PrimitiveType primitiveType) {
1279+
return PrimitiveStringifier.WKB_STRINGIFIER;
1280+
}
1281+
}
1282+
1283+
public static class GeographyLogicalTypeAnnotation extends LogicalTypeAnnotation {
1284+
private final String crs;
1285+
private final EdgeInterpolationAlgorithm algorithm;
1286+
1287+
private GeographyLogicalTypeAnnotation(String crs, EdgeInterpolationAlgorithm algorithm) {
1288+
this.crs = crs;
1289+
this.algorithm = algorithm;
1290+
}
1291+
1292+
@Override
1293+
@Deprecated
1294+
public OriginalType toOriginalType() {
1295+
return null;
1296+
}
1297+
1298+
@Override
1299+
public <T> Optional<T> accept(LogicalTypeAnnotationVisitor<T> logicalTypeAnnotationVisitor) {
1300+
return logicalTypeAnnotationVisitor.visit(this);
1301+
}
1302+
1303+
@Override
1304+
LogicalTypeToken getType() {
1305+
return LogicalTypeToken.GEOGRAPHY;
1306+
}
1307+
1308+
@Override
1309+
protected String typeParametersAsString() {
1310+
boolean hasCrs = crs != null && !crs.isEmpty();
1311+
boolean hasAlgo = algorithm != null;
1312+
if (!hasCrs && !hasAlgo) {
1313+
return "";
1314+
}
1315+
return String.format("(%s,%s)", hasCrs ? crs : DEFAULT_CRS, hasAlgo ? algorithm : DEFAULT_ALGO);
1316+
}
1317+
1318+
public String getCrs() {
1319+
return crs;
1320+
}
1321+
1322+
public EdgeInterpolationAlgorithm getAlgorithm() {
1323+
return algorithm;
1324+
}
1325+
1326+
@Override
1327+
public boolean equals(Object obj) {
1328+
if (!(obj instanceof GeographyLogicalTypeAnnotation)) {
1329+
return false;
1330+
}
1331+
GeographyLogicalTypeAnnotation other = (GeographyLogicalTypeAnnotation) obj;
1332+
return Objects.equals(crs, other.crs) && Objects.equals(algorithm, other.algorithm);
1333+
}
1334+
1335+
@Override
1336+
public int hashCode() {
1337+
return Objects.hash(crs, algorithm);
1338+
}
1339+
1340+
@Override
1341+
PrimitiveStringifier valueStringifier(PrimitiveType primitiveType) {
1342+
return PrimitiveStringifier.WKB_STRINGIFIER;
1343+
}
1344+
}
1345+
11861346
/**
11871347
* Implement this interface to visit a logical type annotation in the schema.
11881348
* The default implementation for each logical type specific visitor method is empty.
@@ -1259,6 +1419,14 @@ default Optional<T> visit(Float16LogicalTypeAnnotation float16LogicalType) {
12591419
return empty();
12601420
}
12611421

1422+
default Optional<T> visit(GeometryLogicalTypeAnnotation geometryLogicalType) {
1423+
return empty();
1424+
}
1425+
1426+
default Optional<T> visit(GeographyLogicalTypeAnnotation geographyLogicalType) {
1427+
return empty();
1428+
}
1429+
12621430
default Optional<T> visit(UnknownLogicalTypeAnnotation unknownLogicalTypeAnnotation) {
12631431
return empty();
12641432
}

parquet-column/src/main/java/org/apache/parquet/schema/PrimitiveStringifier.java

Lines changed: 18 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -35,6 +35,9 @@
3535
import java.util.concurrent.TimeUnit;
3636
import javax.naming.OperationNotSupportedException;
3737
import org.apache.parquet.io.api.Binary;
38+
import org.locationtech.jts.geom.Geometry;
39+
import org.locationtech.jts.io.ParseException;
40+
import org.locationtech.jts.io.WKBReader;
3841

3942
/**
4043
* Class that provides string representations for the primitive values. These string values are to be used for
@@ -442,6 +445,21 @@ private void appendHex(byte[] array, int offset, int length, StringBuilder build
442445
}
443446
};
444447

448+
static final PrimitiveStringifier WKB_STRINGIFIER = new BinaryStringifierBase("WKB_STRINGIFIER") {
449+
450+
@Override
451+
String stringifyNotNull(Binary value) {
452+
453+
try {
454+
WKBReader reader = new WKBReader();
455+
Geometry geometry = reader.read(value.getBytesUnsafe());
456+
return geometry.toText();
457+
} catch (ParseException e) {
458+
return BINARY_INVALID;
459+
}
460+
}
461+
};
462+
445463
static final PrimitiveStringifier FLOAT16_STRINGIFIER = new BinaryStringifierBase("FLOAT16_STRINGIFIER") {
446464

447465
@Override

parquet-column/src/main/java/org/apache/parquet/schema/PrimitiveType.java

Lines changed: 12 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -271,6 +271,18 @@ public Optional<PrimitiveComparator> visit(
271271
LogicalTypeAnnotation.BsonLogicalTypeAnnotation bsonLogicalType) {
272272
return of(PrimitiveComparator.UNSIGNED_LEXICOGRAPHICAL_BINARY_COMPARATOR);
273273
}
274+
275+
@Override
276+
public Optional<PrimitiveComparator> visit(
277+
LogicalTypeAnnotation.GeometryLogicalTypeAnnotation geometryLogicalType) {
278+
return of(PrimitiveComparator.UNSIGNED_LEXICOGRAPHICAL_BINARY_COMPARATOR);
279+
}
280+
281+
@Override
282+
public Optional<PrimitiveComparator> visit(
283+
LogicalTypeAnnotation.GeographyLogicalTypeAnnotation geographyLogicalType) {
284+
return of(PrimitiveComparator.UNSIGNED_LEXICOGRAPHICAL_BINARY_COMPARATOR);
285+
}
274286
})
275287
.orElseThrow(() -> new ShouldNeverHappenException(
276288
"No comparator logic implemented for BINARY logical type: " + logicalType));

parquet-column/src/main/java/org/apache/parquet/schema/Types.java

Lines changed: 12 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -577,6 +577,18 @@ public Optional<Boolean> visit(
577577
return checkBinaryPrimitiveType(enumLogicalType);
578578
}
579579

580+
@Override
581+
public Optional<Boolean> visit(
582+
LogicalTypeAnnotation.GeometryLogicalTypeAnnotation geometryLogicalType) {
583+
return checkBinaryPrimitiveType(geometryLogicalType);
584+
}
585+
586+
@Override
587+
public Optional<Boolean> visit(
588+
LogicalTypeAnnotation.GeographyLogicalTypeAnnotation geographyLogicalType) {
589+
return checkBinaryPrimitiveType(geographyLogicalType);
590+
}
591+
580592
private Optional<Boolean> checkFixedPrimitiveType(
581593
int l, LogicalTypeAnnotation logicalTypeAnnotation) {
582594
Preconditions.checkState(

0 commit comments

Comments
 (0)