Skip to content

Commit e721516

Browse files
aleczoellerAlec Zoellerjiayuasu
authored
[SEDONA-2474] New Java Spark Example (#2475)
Co-authored-by: Alec Zoeller <a.zoeller@gns.cri.nz> Co-authored-by: Jia Yu <jiayu@apache.org>
1 parent 4bc6f30 commit e721516

10 files changed

Lines changed: 547 additions & 0 deletions

File tree

examples/README.md

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -29,6 +29,7 @@ The folder structure of this repository is as follows.
2929

3030
* spark-sql: a Scala template shows how to use Sedona RDD, DataFrame and SQL API
3131
* flink-sql: a Java template show how to use Sedona SQL via Flink Table APIs
32+
* java-spark-sql: a pure Java template implementation and use case for Sedona RDD, DataFrame and SQL API
3233

3334
## Compile and package
3435

@@ -59,6 +60,8 @@ To run the jar in this way, you need to:
5960

6061
We highly suggest you use IDEs to run template projects on your local machine. For Scala, we recommend IntelliJ IDEA with Scala plug-in. For Java, we recommend IntelliJ IDEA and Eclipse. With the help of IDEs, **you don't have to prepare anything** (even don't need to download and set up Spark!). As long as you have Scala and Java, everything works properly!
6162

63+
* In the case of the java-spark-sql template, execute `mvn test` to run JUnit tests using sample GeoParquet file.
64+
6265
### Scala
6366

6467
Import the Scala template project as SBT project. Then run the Main file in this project.

examples/java-spark-sql/.gitignore

Lines changed: 20 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,20 @@
1+
# Licensed to the Apache Software Foundation (ASF) under one
2+
# or more contributor license agreements. See the NOTICE file
3+
# distributed with this work for additional information
4+
# regarding copyright ownership. The ASF licenses this file
5+
# to you under the Apache License, Version 2.0 (the
6+
# "License"); you may not use this file except in compliance
7+
# with the License. You may obtain a copy of the License at
8+
#
9+
# http://www.apache.org/licenses/LICENSE-2.0
10+
#
11+
# Unless required by applicable law or agreed to in writing,
12+
# software distributed under the License is distributed on an
13+
# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
14+
# KIND, either express or implied. See the License for the
15+
# specific language governing permissions and limitations
16+
# under the License.
17+
18+
/.idea/
19+
/target/
20+
dependency-reduced-pom.xml

examples/java-spark-sql/pom.xml

Lines changed: 184 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,184 @@
1+
<!--
2+
~ Licensed to the Apache Software Foundation (ASF) under one
3+
~ or more contributor license agreements. See the NOTICE file
4+
~ distributed with this work for additional information
5+
~ regarding copyright ownership. The ASF licenses this file
6+
~ to you under the Apache License, Version 2.0 (the
7+
~ "License"); you may not use this file except in compliance
8+
~ with the License. You may obtain a copy of the License at
9+
~
10+
~ http://www.apache.org/licenses/LICENSE-2.0
11+
~
12+
~ Unless required by applicable law or agreed to in writing,
13+
~ software distributed under the License is distributed on an
14+
~ "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
15+
~ KIND, either express or implied. See the License for the
16+
~ specific language governing permissions and limitations
17+
~ under the License.
18+
-->
19+
20+
<project xmlns="http://maven.apache.org/POM/4.0.0" xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance"
21+
xsi:schemaLocation="http://maven.apache.org/POM/4.0.0 https://maven.apache.org/xsd/maven-4.0.0.xsd">
22+
<modelVersion>4.0.0</modelVersion>
23+
24+
<groupId>org.apache.sedona</groupId>
25+
<artifactId>sedona-java-spark-example</artifactId>
26+
<version>1.6.1</version>
27+
<name>Sedona : Examples : Java Spark SQL</name>
28+
<description>Example project for Apache Sedona with Java and Spark.</description>
29+
30+
<properties>
31+
<!-- Set spark.scope to "compile" to be able to run locally with java -jar shaded.jar -->
32+
<spark.scope>provided</spark.scope>
33+
<javax.scope>test</javax.scope>
34+
35+
<sedona.version>1.6.1</sedona.version>
36+
<geotools.version>1.8.0-33.1</geotools.version>
37+
<spark.version>3.5.7</spark.version>
38+
<javax.servlet.version>4.0.1</javax.servlet.version>
39+
<spotless.version>3.0.0</spotless.version>
40+
</properties>
41+
42+
<dependencies>
43+
<dependency>
44+
<groupId>org.datasyslab</groupId>
45+
<artifactId>geotools-wrapper</artifactId>
46+
<version>${geotools.version}</version>
47+
</dependency>
48+
<dependency>
49+
<groupId>org.apache.sedona</groupId>
50+
<artifactId>sedona-spark-shaded-3.5_2.13</artifactId>
51+
<version>${sedona.version}</version>
52+
</dependency>
53+
<dependency>
54+
<groupId>org.apache.spark</groupId>
55+
<artifactId>spark-core_2.13</artifactId>
56+
<version>${spark.version}</version>
57+
</dependency>
58+
<dependency>
59+
<groupId>org.apache.spark</groupId>
60+
<artifactId>spark-sql_2.13</artifactId>
61+
<version>${spark.version}</version>
62+
<scope>${spark.scope}</scope>
63+
</dependency>
64+
<dependency>
65+
<groupId>javax.servlet</groupId>
66+
<artifactId>javax.servlet-api</artifactId>
67+
<version>${javax.servlet.version}</version>
68+
<scope>${javax.scope}</scope>
69+
</dependency>
70+
<dependency>
71+
<groupId>org.junit.jupiter</groupId>
72+
<artifactId>junit-jupiter-engine</artifactId>
73+
<version>5.2.0-M1</version>
74+
</dependency>
75+
76+
</dependencies>
77+
<build>
78+
<plugins>
79+
<plugin>
80+
<groupId>org.apache.maven.plugins</groupId>
81+
<artifactId>maven-surefire-plugin</artifactId>
82+
<version>3.2.5</version>
83+
<configuration>
84+
<argLine>
85+
--add-opens=java.base/sun.nio.ch=ALL-UNNAMED
86+
--add-opens=java.base/java.nio=ALL-UNNAMED
87+
--add-opens=java.base/java.lang=ALL-UNNAMED
88+
--add-opens=java.base/java.lang.invoke=ALL-UNNAMED
89+
--add-opens=java.base/java.util=ALL-UNNAMED
90+
</argLine>
91+
</configuration>
92+
</plugin>
93+
<plugin>
94+
<groupId>org.apache.maven.plugins</groupId>
95+
<artifactId>maven-shade-plugin</artifactId>
96+
<version>2.1</version>
97+
<executions>
98+
<execution>
99+
<phase>package</phase>
100+
<goals>
101+
<goal>shade</goal>
102+
</goals>
103+
<configuration>
104+
<transformers>
105+
<!-- use transformer to handle merge of META-INF/services - see http://java.net/jira/browse/JERSEY-440?focusedCommentId=14822&page=com.atlassian.jira.plugin.system.issuetabpanels%3Acomment-tabpanel#action_14822 -->
106+
<transformer
107+
implementation="org.apache.maven.plugins.shade.resource.ServicesResourceTransformer" />
108+
<transformer
109+
implementation="org.apache.maven.plugins.shade.resource.AppendingTransformer">
110+
<resource>reference.conf</resource>
111+
</transformer>
112+
<transformer
113+
implementation="org.apache.maven.plugins.shade.resource.ManifestResourceTransformer">
114+
<manifestEntries>
115+
<Specification-Title>Java Advanced Imaging Image I/O Tools</Specification-Title>
116+
<Specification-Version>1.1</Specification-Version>
117+
<Specification-Vendor>Sun Microsystems, Inc.</Specification-Vendor>
118+
<Implementation-Title>com.sun.media.imageio</Implementation-Title>
119+
<Implementation-Version>1.1</Implementation-Version>
120+
<Implementation-Vendor>Sun Microsystems, Inc.</Implementation-Vendor>
121+
<Extension-Name>com.sun.media.imageio</Extension-Name>
122+
</manifestEntries>
123+
</transformer>
124+
<transformer implementation="org.apache.maven.plugins.shade.resource.ManifestResourceTransformer">
125+
<mainClass>spark.SedonaGeoParquetMain</mainClass>
126+
</transformer>
127+
</transformers>
128+
<filters>
129+
<!-- filter to address "Invalid signature file" issue - see http://stackoverflow.com/a/6743609/589215 -->
130+
<filter>
131+
<artifact>*:*</artifact>
132+
<excludes>
133+
<exclude>META-INF/*.SF</exclude>
134+
<exclude>META-INF/*.DSA</exclude>
135+
<exclude>META-INF/*.RSA</exclude>
136+
</excludes>
137+
</filter>
138+
</filters>
139+
<argLine>
140+
--add-opens=java.base/sun.nio.ch=ALL-UNNAMED
141+
--add-opens=java.base/java.nio=ALL-UNNAMED
142+
--add-opens=java.base/java.lang=ALL-UNNAMED
143+
--add-opens=java.base/java.util=ALL-UNNAMED
144+
</argLine>
145+
</configuration>
146+
</execution>
147+
</executions>
148+
</plugin>
149+
<plugin>
150+
<groupId>com.diffplug.spotless</groupId>
151+
<artifactId>spotless-maven-plugin</artifactId>
152+
<version>${spotless.version}</version>
153+
<configuration>
154+
<formats>
155+
<!-- you can define as many formats as you want, each is independent -->
156+
<format>
157+
<!-- define the files to apply to -->
158+
<includes>
159+
<include>.gitattributes</include>
160+
<include>.gitignore</include>
161+
</includes>
162+
<!-- define the steps to apply to those files -->
163+
<trimTrailingWhitespace/>
164+
<endWithNewline/>
165+
<indent>
166+
<tabs>true</tabs>
167+
<spacesPerTab>4</spacesPerTab>
168+
</indent>
169+
</format>
170+
</formats>
171+
<!-- define a language-specific format -->
172+
<java>
173+
<googleJavaFormat>
174+
<version>1.10</version>
175+
<style>AOSP</style>
176+
<reflowLongStrings>true</reflowLongStrings>
177+
<formatJavadoc>false</formatJavadoc>
178+
</googleJavaFormat>
179+
</java>
180+
</configuration>
181+
</plugin>
182+
</plugins>
183+
</build>
184+
</project>
Lines changed: 90 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,90 @@
1+
/*
2+
* Licensed to the Apache Software Foundation (ASF) under one
3+
* or more contributor license agreements. See the NOTICE file
4+
* distributed with this work for additional information
5+
* regarding copyright ownership. The ASF licenses this file
6+
* to you under the Apache License, Version 2.0 (the
7+
* "License"); you may not use this file except in compliance
8+
* with the License. You may obtain a copy of the License at
9+
*
10+
* http://www.apache.org/licenses/LICENSE-2.0
11+
*
12+
* Unless required by applicable law or agreed to in writing,
13+
* software distributed under the License is distributed on an
14+
* "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
15+
* KIND, either express or implied. See the License for the
16+
* specific language governing permissions and limitations
17+
* under the License.
18+
*/
19+
20+
package spark;
21+
22+
import org.apache.sedona.core.spatialOperator.RangeQuery;
23+
import org.apache.sedona.core.spatialOperator.SpatialPredicate;
24+
import org.apache.sedona.core.spatialRDD.SpatialRDD;
25+
import org.apache.sedona.sql.utils.Adapter;
26+
import org.apache.spark.sql.Dataset;
27+
import org.apache.spark.sql.Row;
28+
import org.apache.spark.sql.SparkSession;
29+
import org.locationtech.jts.geom.Coordinate;
30+
import org.locationtech.jts.geom.Geometry;
31+
import org.locationtech.jts.geom.GeometryFactory;
32+
import org.locationtech.jts.geom.Polygon;
33+
34+
import java.util.List;
35+
36+
37+
public class GeoParquetAccessor {
38+
39+
private final SparkSession session;
40+
private String parquetPath;
41+
42+
public GeoParquetAccessor() {
43+
this.session = new SedonaSparkSession().getSession();
44+
this.parquetPath = "";
45+
}
46+
47+
//Overload with constructor that has Spark session provided
48+
//Use to avoid error - can't have two SparkContext objects on one JVM
49+
public GeoParquetAccessor(SparkSession session, String parquetPath) {
50+
this.session = session;
51+
this.parquetPath = parquetPath;
52+
}
53+
54+
public List<Geometry> selectFeaturesByPolygon(double xmin, double ymax,
55+
double xmax, double ymin,
56+
String geometryColumn) {
57+
58+
//Read the GeoParquet file into a DataFrame
59+
Dataset<Row> insarDF = session.read().format("geoparquet").load(parquetPath);
60+
61+
//Convert the DataFrame to a SpatialRDD
62+
//The second argument to toSpatialRdd is the name of the geometry column.
63+
SpatialRDD<Geometry> insarRDD = Adapter.toSpatialRdd(insarDF, geometryColumn);
64+
65+
// Define the polygon for the spatial query
66+
GeometryFactory geometryFactory = new GeometryFactory();
67+
Coordinate[] coordinates = new Coordinate[] {
68+
new Coordinate(xmin, ymin),
69+
new Coordinate(xmax, ymin),
70+
new Coordinate(xmax, ymax),
71+
new Coordinate(xmin, ymax),
72+
new Coordinate(xmin, ymin) // A closed polygon has the same start and end coordinate
73+
};
74+
Polygon queryPolygon = geometryFactory.createPolygon(coordinates);
75+
76+
// Perform the spatial range query
77+
// This will return all geometries that intersect with the query polygon.
78+
// Alternatives are SpatialPredicate.CONTAINS or SpatialPredicate.WITHIN
79+
SpatialRDD<Geometry> resultRDD = new SpatialRDD<>();
80+
try {
81+
resultRDD.rawSpatialRDD = RangeQuery.SpatialRangeQuery(insarRDD, queryPolygon, SpatialPredicate.INTERSECTS, false);
82+
} catch (Exception e) {
83+
e.printStackTrace();
84+
}
85+
86+
// Collect the results back to the driver
87+
return resultRDD.getRawSpatialRDD().collect();
88+
}
89+
90+
}
Lines changed: 61 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,61 @@
1+
/*
2+
* Licensed to the Apache Software Foundation (ASF) under one
3+
* or more contributor license agreements. See the NOTICE file
4+
* distributed with this work for additional information
5+
* regarding copyright ownership. The ASF licenses this file
6+
* to you under the Apache License, Version 2.0 (the
7+
* "License"); you may not use this file except in compliance
8+
* with the License. You may obtain a copy of the License at
9+
*
10+
* http://www.apache.org/licenses/LICENSE-2.0
11+
*
12+
* Unless required by applicable law or agreed to in writing,
13+
* software distributed under the License is distributed on an
14+
* "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
15+
* KIND, either express or implied. See the License for the
16+
* specific language governing permissions and limitations
17+
* under the License.
18+
*/
19+
20+
package spark;
21+
22+
import org.locationtech.jts.geom.Coordinate;
23+
import org.locationtech.jts.geom.Geometry;
24+
25+
import java.io.IOException;
26+
import java.io.InputStream;
27+
import java.util.List;
28+
import java.util.Properties;
29+
30+
public class SedonaGeoParquetMain {
31+
32+
protected static Properties properties;
33+
protected static String parquetPath;
34+
protected static SedonaSparkSession session;
35+
36+
public static void main(String args[]) {
37+
38+
session = new SedonaSparkSession();
39+
//Get parquetPath and any other application.properties
40+
try {
41+
ClassLoader loader = Thread.currentThread().getContextClassLoader();
42+
Properties properties = new Properties();
43+
InputStream is = loader.getResourceAsStream("application.properties");
44+
properties.load(is);
45+
parquetPath = properties.getProperty("parquet.path");
46+
} catch (IOException e) {
47+
e.printStackTrace();
48+
parquetPath = "";
49+
}
50+
GeoParquetAccessor accessor = new GeoParquetAccessor(session.session, parquetPath);
51+
//Test parquet happens to be in New Zealand Transverse Mercator (EPSG:2193) (meters)
52+
List<Geometry> geoms = accessor.selectFeaturesByPolygon(1155850, 4819840, 1252000, 4748100, "geometry");
53+
System.out.println("Coordinates of convex hull of points in boundary:");
54+
for (Geometry geom : geoms) {
55+
Coordinate[] convexHullCoordinates = geom.convexHull().getCoordinates();
56+
for (Coordinate coord : convexHullCoordinates) {
57+
System.out.println(String.format("\t%s", coord.toString()));
58+
}
59+
}
60+
}
61+
}

0 commit comments

Comments
 (0)