Skip to content

Commit c1efb06

Browse files
author
naganomei
committed
Add Cache IO aggregation
1 parent cb40927 commit c1efb06

16 files changed

Lines changed: 909 additions & 65 deletions

CMakeLists.txt

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -13,6 +13,7 @@ option(BUILD_UCM_MINDIE "build ucm MindIE integration module." OFF)
1313
option(BUILD_UNIT_TESTS "build all unit test suits." OFF)
1414
option(BUILD_NUMA "build numactl library." OFF)
1515
option(UCM_ENABLE_GDR_STREAM "build GPUDirect RDMA stream support for CUDA runtime." OFF)
16+
option(UCM_ENABLE_ASCEND_IO_AGGREGATION "build Ascend Cache IO aggregation support." OFF)
1617
option(ASCEND_SUPPORTS_REGISTER_PIN "enable Ascend register pin optimization (requires CANN >= 8.5)" OFF)
1718
option(DOWNLOAD_DEPENDENCE "download dependence by cmake." ON)
1819
set(RUNTIME_ENVIRONMENT "simu" CACHE STRING "runtime: simu, ascend, musa or cuda.")

examples/ucm_config_example.yaml

Lines changed: 5 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -24,6 +24,11 @@ ucm_connectors:
2424
# and building UCM with ENABLE_GDR=1.
2525
use_gdr: false
2626

27+
# Enable Cache IO aggregation for Ascend small-IO CacheStore H2D and D2H copies.
28+
# This aggregates one shard into one IO object to reduce many small transfers.
29+
# This requires an Ascend runtime and building UCM with UCM_ENABLE_ASCEND_IO_AGGREGATION=1.
30+
cache_io_aggregation: false
31+
2732
# When you use UcmNfsStore, you should set enable_event_sync to false.
2833
enable_event_sync: true
2934
# Enable UCM metrics so they can be monitored online via Grafana and Prometheus.

setup.py

Lines changed: 12 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -35,7 +35,14 @@
3535
ENABLE_SPARSE = os.getenv("ENABLE_SPARSE")
3636
ENABLE_MINDIE = os.getenv("UCM_ENABLE_MINDIE", "0") not in ("", "0", "false", "False")
3737
ENABLE_GDR = os.getenv("ENABLE_GDR", "0") not in ("", "0", "false", "False")
38-
38+
ENABLE_ASCEND_IO_AGGREGATION = os.getenv(
39+
"UCM_ENABLE_ASCEND_IO_AGGREGATION", "0"
40+
) not in (
41+
"",
42+
"0",
43+
"false",
44+
"False",
45+
)
3946

4047
def get_abi_flag_from_env() -> str:
4148
v = os.environ.get("UCM_CXX11_ABI")
@@ -171,6 +178,10 @@ def build_cmake(self, ext: CMakeExtension):
171178
if ENABLE_GDR:
172179
cmake_args += ["-DUCM_ENABLE_GDR_STREAM=ON"]
173180

181+
cmake_args += [
182+
f"-DUCM_ENABLE_ASCEND_IO_AGGREGATION={'ON' if ENABLE_ASCEND_IO_AGGREGATION else 'OFF'}"
183+
]
184+
174185
match PLATFORM:
175186
case "cuda":
176187
cmake_args += ["-DRUNTIME_ENVIRONMENT=cuda"]

ucm/shared/trans/ascend/CMakeLists.txt

Lines changed: 48 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -12,6 +12,54 @@ add_library(trans STATIC
1212
if(ASCEND_SUPPORTS_REGISTER_PIN)
1313
target_compile_definitions(trans PRIVATE ASCEND_SUPPORTS_REGISTER_PIN=1)
1414
endif()
15+
if(UCM_ENABLE_ASCEND_IO_AGGREGATION)
16+
find_library(ASCEND_RUNTIME_LIBRARY runtime
17+
PATHS
18+
"${ASCEND_ROOT}/lib64"
19+
"${ASCEND_ROOT}/lib"
20+
"${ASCEND_ROOT}/runtime/lib64"
21+
"${ASCEND_ROOT}/runtime/lib"
22+
"${ASCEND_ROOT}/aarch64-linux/lib64"
23+
"${ASCEND_ROOT}/aarch64-linux/lib"
24+
NO_DEFAULT_PATH
25+
)
26+
find_path(ASCEND_FFTS_INCLUDE_DIR
27+
NAMES runtime/rt_ffts_plus.h rt_external_ffts.h
28+
PATHS
29+
"${ASCEND_ROOT}/include"
30+
"${ASCEND_ROOT}/pkg_inc"
31+
"${ASCEND_ROOT}/pkg_inc/runtime"
32+
"${ASCEND_ROOT}/aarch64-linux/pkg_inc"
33+
"${ASCEND_ROOT}/aarch64-linux/pkg_inc/runtime"
34+
NO_DEFAULT_PATH
35+
)
36+
if(NOT ASCEND_RUNTIME_LIBRARY OR NOT ASCEND_FFTS_INCLUDE_DIR)
37+
message(FATAL_ERROR "UCM_ENABLE_ASCEND_IO_AGGREGATION requires FFTS headers and libruntime.")
38+
endif()
39+
get_filename_component(ASCEND_FFTS_INCLUDE_PARENT "${ASCEND_FFTS_INCLUDE_DIR}" DIRECTORY)
40+
set(ASCEND_FFTS_INCLUDE_DIRS "${ASCEND_FFTS_INCLUDE_DIR}")
41+
if(ASCEND_FFTS_INCLUDE_DIR MATCHES "/runtime$" AND EXISTS "${ASCEND_FFTS_INCLUDE_PARENT}")
42+
list(APPEND ASCEND_FFTS_INCLUDE_DIRS "${ASCEND_FFTS_INCLUDE_PARENT}")
43+
endif()
44+
list(APPEND ASCEND_FFTS_INCLUDE_DIRS
45+
"${ASCEND_ROOT}/pkg_inc"
46+
"${ASCEND_ROOT}/pkg_inc/toolchain"
47+
"${ASCEND_ROOT}/pkg_inc/profiling"
48+
"${ASCEND_ROOT}/aarch64-linux/pkg_inc"
49+
"${ASCEND_ROOT}/aarch64-linux/pkg_inc/toolchain"
50+
"${ASCEND_ROOT}/aarch64-linux/pkg_inc/profiling"
51+
)
52+
list(REMOVE_DUPLICATES ASCEND_FFTS_INCLUDE_DIRS)
53+
target_sources(trans PRIVATE
54+
ascend_shard_io_aggregator.cc
55+
ffts_d2d_dispatcher.cc
56+
)
57+
target_include_directories(trans PUBLIC ${ASCEND_FFTS_INCLUDE_DIRS})
58+
target_compile_definitions(trans PUBLIC UCM_ENABLE_ASCEND_IO_AGGREGATION=1)
59+
target_link_libraries(trans PUBLIC ${ASCEND_RUNTIME_LIBRARY})
60+
else()
61+
target_compile_definitions(trans PUBLIC UCM_ENABLE_ASCEND_IO_AGGREGATION=0)
62+
endif()
1563
target_link_libraries(trans PUBLIC
1664
fmt
1765
Ascend::ascendcl

0 commit comments

Comments
 (0)