Skip to content

Commit e39fc3f

Browse files
committed
[build] Improve finding Spark-compatible Java version
Different Spark versions support different versions of Java. The latest versions of Spark support current LTS Java versions 17 and 21. In any case, there is no Spark version that supports a Java version greater than 21. Thus, we should check in CMake that the environment-installed Java version is in a reasonable range. Since the CMake FindJava module does not support version ranges, we introduce an extra check with an error in case a too recent version of Java is detected. This would help fail early on CI nodes where an incompatible Java version is installed.
1 parent 1bc9b93 commit e39fc3f

1 file changed

Lines changed: 24 additions & 22 deletions

File tree

cmake/modules/FindPySpark.cmake

Lines changed: 24 additions & 22 deletions
Original file line numberDiff line numberDiff line change
@@ -23,34 +23,36 @@
2323

2424
message(STATUS "Looking for PySpark dependency: Java")
2525
if(PySpark_FIND_REQUIRED)
26-
find_package(Java 1.8 REQUIRED COMPONENTS Runtime)
26+
find_package(Java 17 REQUIRED COMPONENTS Runtime)
2727
else()
28-
find_package(Java 1.8 COMPONENTS Runtime)
28+
find_package(Java 17 COMPONENTS Runtime)
2929
endif()
3030

3131
if(Java_FOUND)
32-
message(STATUS "Found Java ${Java_JAVA_EXECUTABLE}")
33-
message(STATUS "Java version ${Java_VERSION_STRING}")
34-
35-
# Import pyspark using the main Python executable, print its version and path to the __init__.py file
36-
execute_process(
37-
COMMAND ${Python3_EXECUTABLE} -c "import pyspark; print(pyspark.__version__)"
38-
RESULT_VARIABLE _PYSPARK_IMPORT_EXIT_STATUS
39-
OUTPUT_VARIABLE _PYSPARK_VALUES_OUTPUT
40-
ERROR_VARIABLE _PYSPARK_ERROR_VALUE
41-
OUTPUT_STRIP_TRAILING_WHITESPACE
42-
)
43-
44-
# Exit status equal to zero means success
45-
if(_PYSPARK_IMPORT_EXIT_STATUS EQUAL 0)
46-
# Build the version string
47-
string(REGEX MATCH "^[0-9]+\\.[0-9]+\\.[0-9]+" PySpark_VERSION_STRING "${_PYSPARK_VALUES_OUTPUT}")
48-
# Signal to CMake that the environment could import pyspark and Java runtime was found
49-
set(PySpark_DEPENDENCIES_READY TRUE)
32+
if(${Java_VERSION_MAJOR} VERSION_GREATER 21)
33+
# It would be nice if we could use the standard find_package version range, but the FindJava module does not support that.
34+
message(FATAL_ERROR "Currently, there are no Spark versions that support Java version greater than 21. Found Java version ${Java_VERSION_STRING}.")
5035
else()
51-
message(STATUS "Python package 'pyspark' could not be imported with ${Python3_EXECUTABLE}\n"
52-
"${_PYSPARK_ERROR_VALUE}"
36+
# Import pyspark using the main Python executable, print its version and path to the __init__.py file
37+
execute_process(
38+
COMMAND ${Python3_EXECUTABLE} -c "import pyspark; print(pyspark.__version__)"
39+
RESULT_VARIABLE _PYSPARK_IMPORT_EXIT_STATUS
40+
OUTPUT_VARIABLE _PYSPARK_VALUES_OUTPUT
41+
ERROR_VARIABLE _PYSPARK_ERROR_VALUE
42+
OUTPUT_STRIP_TRAILING_WHITESPACE
5343
)
44+
45+
# Exit status equal to zero means success
46+
if(_PYSPARK_IMPORT_EXIT_STATUS EQUAL 0)
47+
# Build the version string
48+
string(REGEX MATCH "^[0-9]+\\.[0-9]+\\.[0-9]+" PySpark_VERSION_STRING "${_PYSPARK_VALUES_OUTPUT}")
49+
# Signal to CMake that the environment could import pyspark and Java runtime was found
50+
set(PySpark_DEPENDENCIES_READY TRUE)
51+
else()
52+
message(STATUS "Python package 'pyspark' could not be imported with ${Python3_EXECUTABLE}\n"
53+
"${_PYSPARK_ERROR_VALUE}"
54+
)
55+
endif()
5456
endif()
5557

5658
find_package_handle_standard_args(PySpark

0 commit comments

Comments
 (0)