Skip to content

Commit 2b1803a

Browse files
authored
[fix](paimon-cpp) deduplicate Arrow linking to fix SIGSEGV in FilterRowGroupsByPredicate (#60883)
## Proposed changes ### Problem When `ENABLE_PAIMON_CPP` is ON, both Doris's own `libarrow.a` and paimon-cpp's `libarrow.a` are linked into `doris_be`, causing **3698 duplicate global symbols**. This leads to **SIGSEGV crashes** in `paimon::parquet::ParquetFileBatchReader::FilterRowGroupsByPredicate` when `libarrow_dataset.a` resolves arrow core calls to the wrong copy (compiled with different feature flags). Both are Arrow 17.0.0 but compiled with different options: | Feature | Doris Arrow | paimon Arrow | |---|---|---| | COMPUTE | OFF | **ON** | | DATASET | OFF | **ON** | | ACERO | OFF | **ON** | | FILESYSTEM | OFF | **ON** | | FLIGHT | **ON** | OFF | | FLIGHT_SQL | **ON** | OFF | | PARQUET | ON | ON | ### Crash Stack ``` SIGSEGV invalid permissions for mapped object → std::string::basic_string(char const*, ...) → paimon::ToPaimonStatus(arrow::Status const&) → paimon::parquet::ParquetFileBatchReader::FilterRowGroupsByPredicate(...) ``` ### Root Cause Inside `-Wl,--start-group ... --end-group`, the linker may resolve symbols from `libarrow_dataset.a` (paimon's) to Doris's `libarrow.a`, which was compiled without COMPUTE/FILESYSTEM modules. The internal object memory layout differs, causing `arrow::Status` and other objects to trigger illegal memory access when passed across library boundaries. ### Fix When the `paimon_deps` Arrow stack is selected (because Doris lacks `libarrow_dataset.a` / `libarrow_acero.a`), remove Doris's `arrow` from `COMMON_THIRDPARTY`. paimon's `libarrow.a` is a **superset** of Doris's version (same 17.0.0, with additional modules enabled), so it provides all symbols needed by Doris's `libarrow_flight.a` / `libarrow_flight_sql.a`. ### Impact - Only `be/CMakeLists.txt` changed (~10 lines). - No C++/Java business code changes. - No impact when `ENABLE_PAIMON_CPP=OFF`. ## Types of changes - [x] Bug fix (non-breaking change which fixes an issue)
1 parent 6d5ebe0 commit 2b1803a

File tree

2 files changed

+11
-82
lines changed

2 files changed

+11
-82
lines changed

be/CMakeLists.txt

Lines changed: 9 additions & 82 deletions
Original file line numberDiff line numberDiff line change
@@ -623,77 +623,16 @@ if (BUILD_BENCHMARK)
623623
endif()
624624

625625
set(PAIMON_FACTORY_REGISTRY_LIBS)
626-
set(PAIMON_ARROW_CORE_LIB)
627-
set(PAIMON_ARROW_FILESYSTEM_LIB)
628-
set(PAIMON_ARROW_DATASET_LIB)
629-
set(PAIMON_ARROW_ACERO_LIB)
630626
if (ENABLE_PAIMON_CPP)
631-
set(_paimon_arrow_core_candidates
632-
${THIRDPARTY_DIR}/paimon-cpp/lib64/paimon_deps/libarrow.a
633-
${THIRDPARTY_DIR}/lib64/libarrow.a
634-
${THIRDPARTY_DIR}/lib/libarrow.a
635-
)
636-
foreach(_paimon_arrow_core_candidate IN LISTS _paimon_arrow_core_candidates)
637-
if (EXISTS "${_paimon_arrow_core_candidate}")
638-
add_library(paimon_arrow_core STATIC IMPORTED)
639-
set_target_properties(paimon_arrow_core PROPERTIES
640-
IMPORTED_LOCATION ${_paimon_arrow_core_candidate})
641-
set(PAIMON_ARROW_CORE_LIB paimon_arrow_core)
642-
break()
643-
endif()
644-
endforeach()
645-
set(_paimon_arrow_filesystem_candidates
646-
${THIRDPARTY_DIR}/paimon-cpp/lib64/paimon_deps/libarrow_filesystem.a
647-
${THIRDPARTY_DIR}/lib64/libarrow_filesystem.a
648-
${THIRDPARTY_DIR}/lib/libarrow_filesystem.a
649-
)
650-
foreach(_paimon_arrow_filesystem_candidate IN LISTS _paimon_arrow_filesystem_candidates)
651-
if (EXISTS "${_paimon_arrow_filesystem_candidate}")
652-
add_library(paimon_arrow_filesystem STATIC IMPORTED)
653-
set_target_properties(paimon_arrow_filesystem PROPERTIES
654-
IMPORTED_LOCATION ${_paimon_arrow_filesystem_candidate})
655-
set(PAIMON_ARROW_FILESYSTEM_LIB paimon_arrow_filesystem)
656-
break()
657-
endif()
658-
endforeach()
659-
set(_paimon_arrow_dataset_candidates
660-
${THIRDPARTY_DIR}/paimon-cpp/lib64/paimon_deps/libarrow_dataset.a
661-
${THIRDPARTY_DIR}/lib64/libarrow_dataset.a
662-
${THIRDPARTY_DIR}/lib/libarrow_dataset.a
663-
)
664-
foreach(_paimon_arrow_dataset_candidate IN LISTS _paimon_arrow_dataset_candidates)
665-
if (EXISTS "${_paimon_arrow_dataset_candidate}")
666-
add_library(paimon_arrow_dataset STATIC IMPORTED)
667-
set_target_properties(paimon_arrow_dataset PROPERTIES
668-
IMPORTED_LOCATION ${_paimon_arrow_dataset_candidate})
669-
set(PAIMON_ARROW_DATASET_LIB paimon_arrow_dataset)
670-
break()
671-
endif()
672-
endforeach()
673-
set(_paimon_arrow_acero_candidates
674-
${THIRDPARTY_DIR}/paimon-cpp/lib64/paimon_deps/libarrow_acero.a
675-
${THIRDPARTY_DIR}/lib64/libarrow_acero.a
676-
${THIRDPARTY_DIR}/lib/libarrow_acero.a
677-
)
678-
foreach(_paimon_arrow_acero_candidate IN LISTS _paimon_arrow_acero_candidates)
679-
if (EXISTS "${_paimon_arrow_acero_candidate}")
680-
add_library(paimon_arrow_acero STATIC IMPORTED)
681-
set_target_properties(paimon_arrow_acero PROPERTIES
682-
IMPORTED_LOCATION ${_paimon_arrow_acero_candidate})
683-
set(PAIMON_ARROW_ACERO_LIB paimon_arrow_acero)
684-
break()
685-
endif()
686-
endforeach()
687-
if (PAIMON_ARROW_DATASET_LIB)
688-
# paimon_parquet_file_format depends on Arrow Dataset symbols.
689-
# Force-link it only when arrow_dataset is available.
690-
set(PAIMON_FACTORY_REGISTRY_LIBS
691-
paimon_parquet_file_format
692-
)
693-
list(REMOVE_ITEM COMMON_THIRDPARTY ${PAIMON_FACTORY_REGISTRY_LIBS})
694-
else()
695-
message(STATUS "Paimon C++: libarrow_dataset.a not found, keep paimon_parquet_file_format as regular static lib")
696-
endif()
627+
# Plan B: Doris Arrow is now built with COMPUTE/DATASET/ACERO/FILESYSTEM,
628+
# so arrow, arrow_dataset, arrow_acero are all in COMMON_THIRDPARTY via
629+
# thirdparty.cmake. paimon-cpp reuses the same Arrow (no paimon_deps).
630+
# No dual-stack selection needed — single Arrow for everything.
631+
632+
# paimon_parquet_file_format depends on Arrow Dataset symbols.
633+
# Force-link it with --whole-archive so its factory registration runs.
634+
set(PAIMON_FACTORY_REGISTRY_LIBS paimon_parquet_file_format)
635+
list(REMOVE_ITEM COMMON_THIRDPARTY ${PAIMON_FACTORY_REGISTRY_LIBS})
697636
endif()
698637

699638
set(DORIS_DEPENDENCIES
@@ -720,18 +659,6 @@ if (ENABLE_PAIMON_CPP)
720659
${PAIMON_FACTORY_REGISTRY_LIBS}
721660
-Wl,--no-whole-archive)
722661
endif()
723-
if (PAIMON_ARROW_CORE_LIB)
724-
set(DORIS_DEPENDENCIES ${DORIS_DEPENDENCIES} ${PAIMON_ARROW_CORE_LIB})
725-
endif()
726-
if (PAIMON_ARROW_FILESYSTEM_LIB)
727-
set(DORIS_DEPENDENCIES ${DORIS_DEPENDENCIES} ${PAIMON_ARROW_FILESYSTEM_LIB})
728-
endif()
729-
if (PAIMON_ARROW_DATASET_LIB)
730-
set(DORIS_DEPENDENCIES ${DORIS_DEPENDENCIES} ${PAIMON_ARROW_DATASET_LIB})
731-
endif()
732-
if (PAIMON_ARROW_ACERO_LIB)
733-
set(DORIS_DEPENDENCIES ${DORIS_DEPENDENCIES} ${PAIMON_ARROW_ACERO_LIB})
734-
endif()
735662

736663
# paimon-cpp internal dependencies (renamed with _paimon suffix)
737664
# These must come after paimon libraries to resolve symbols.

be/cmake/thirdparty.cmake

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -106,6 +106,8 @@ add_thirdparty(zstd LIB64)
106106
add_thirdparty(arrow LIB64)
107107
add_thirdparty(arrow_flight LIB64)
108108
add_thirdparty(arrow_flight_sql LIB64)
109+
add_thirdparty(arrow_dataset LIB64)
110+
add_thirdparty(arrow_acero LIB64)
109111
add_thirdparty(parquet LIB64)
110112
add_thirdparty(brpc LIB64)
111113
add_thirdparty(rocksdb)

0 commit comments

Comments
 (0)