Skip to content

Commit cfea02e

Browse files
committed
build: throttle torch source compilation via Ninja job pool
Each generated `<op>.cc` instantiates `at::<op>_out(...)`, which expands roughly 0.5-1 GB of ATen template metaprogramming. With 451 ops compiled in parallel at Ninja's default `-j$(nproc)`, peak memory can exceed 30 GB and the OOM killer drops `cc1plus` on build hosts that allocate less RAM (observed on metax, moore, and cambricon CI containers). Add a Ninja job pool `torch_compile=4` and apply it to: - the vendor-system-g++ `add_custom_command` recompile loop (metax / moore), via `JOB_POOL`; - a new `infiniops_torch_objs` OBJECT library for the regular cmake build path (cambricon / nvidia / iluvatar), via `JOB_POOL_COMPILE`. The rest of the build keeps full parallelism.
1 parent 800619d commit cfea02e

1 file changed

Lines changed: 24 additions & 1 deletion

File tree

src/CMakeLists.txt

Lines changed: 24 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -279,6 +279,17 @@ if(WITH_TORCH)
279279
${PROJECT_SOURCE_DIR}/generated
280280
)
281281

282+
# Each generated `<op>.cc` instantiates `at::<op>_out(...)`, which
283+
# pulls in roughly 0.5-1 GB of ATen template metaprogramming. At
284+
# ninja's default parallelism (one job per CPU), a build with 451
285+
# ops can blow past 30 GB of RSS and the OOM killer drops
286+
# `cc1plus`. Cap the heavyweight torch sources to 4 concurrent
287+
# compilations via a Ninja job pool; the rest of the build keeps
288+
# full parallelism.
289+
if(CMAKE_GENERATOR MATCHES "Ninja")
290+
set_property(GLOBAL APPEND PROPERTY JOB_POOLS torch_compile=4)
291+
endif()
292+
282293
if(WITH_METAX OR WITH_MOORE)
283294
# Vendor compilers (`mxcc`/`mcc`) cannot compile vendor-forked `torch`
284295
# headers. Compile `torch` sources with the system C++ compiler instead.
@@ -330,6 +341,7 @@ if(WITH_TORCH)
330341
-c "${_src}" -o "${_obj}"
331342
DEPENDS "${_src}"
332343
COMMENT "Compiling ${_rel} with system C++ compiler"
344+
JOB_POOL torch_compile
333345
)
334346
list(APPEND TORCH_OBJECT_FILES "${_obj}")
335347
endforeach()
@@ -338,7 +350,18 @@ if(WITH_TORCH)
338350
PROPERTIES EXTERNAL_OBJECT TRUE GENERATED TRUE)
339351
target_sources(infiniops PRIVATE ${TORCH_OBJECT_FILES})
340352
else()
341-
target_sources(infiniops PRIVATE ${TORCH_SOURCES})
353+
# Build the heavy torch sources as their own object library so
354+
# the Ninja `torch_compile` job pool throttles only those
355+
# compilations and the rest of `infiniops` keeps full
356+
# parallelism.
357+
add_library(infiniops_torch_objs OBJECT ${TORCH_SOURCES})
358+
target_link_libraries(infiniops_torch_objs PUBLIC infiniops)
359+
if(CMAKE_GENERATOR MATCHES "Ninja")
360+
set_target_properties(infiniops_torch_objs
361+
PROPERTIES JOB_POOL_COMPILE torch_compile)
362+
endif()
363+
target_sources(infiniops PRIVATE
364+
$<TARGET_OBJECTS:infiniops_torch_objs>)
342365
endif()
343366
endif()
344367

0 commit comments

Comments
 (0)