Skip to content

Commit 2b54d28

Browse files
committed
BUILD: cuda early start
Signed-off-by: Ilya Kryukov <ikryukov@nvidia.com>
1 parent ac9cb19 commit 2b54d28

3 files changed

Lines changed: 41 additions & 1 deletion

File tree

Makefile.am

Lines changed: 8 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -34,6 +34,14 @@ clean-local: $(PARALLEL_SUBDIRS:%=clean-par/%)
3434
clean-par/%:
3535
+$(MAKE) -C $* clean
3636

37+
uninstall-local: $(PARALLEL_SUBDIRS:%=uninstall-par/%)
38+
uninstall-par/%:
39+
+$(MAKE) -C $* uninstall
40+
41+
check-local: $(PARALLEL_SUBDIRS:%=check-par/%)
42+
check-par/%:
43+
+$(MAKE) -C $* check
44+
3745
distclean-local: $(PARALLEL_SUBDIRS:%=distclean-par/%)
3846
distclean-par/%:
3947
-+$(MAKE) -C $* distclean

src/Makefile.am

Lines changed: 32 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -38,6 +38,18 @@ COMPONENT_DIRS = $(cl_dirs) $(mc_dirs) $(ec_dirs) $(topo_dirs)
3838

3939
include components/tl/makefile.am
4040

41+
# Kernel-only subdirectories whose .cu compilation can overlap with libucc.la.
42+
# NVCC only needs headers (source tree + config.h from configure), so there is
43+
# no build-time dependency on the compiled core library. The final link of the
44+
# parent component (libucc_ec_cuda.la, etc.) still waits for libucc.la below.
45+
early_cuda_kernel_dirs =
46+
if HAVE_CUDA
47+
early_cuda_kernel_dirs += components/ec/cuda/kernel
48+
endif
49+
if TL_CUDA_NVLS_ENABLED
50+
early_cuda_kernel_dirs += components/tl/cuda/kernels
51+
endif
52+
4153
# All possible component subdirectories (including conditional ones)
4254
# needed for make dist to find all source files.
4355
DIST_SUBDIRS = . \
@@ -60,10 +72,25 @@ DIST_SUBDIRS = . \
6072
components/topo/cuda \
6173
components/topo/ib
6274

63-
all-local: $(COMPONENT_DIRS:%=all-component/%)
75+
all-local: $(early_cuda_kernel_dirs:%=compile-kernels/%) $(COMPONENT_DIRS:%=all-component/%)
76+
77+
# Start CUDA kernel compilation immediately (no libucc.la dependency).
78+
compile-kernels/%:
79+
+$(MAKE) -C $* all
80+
81+
# Components wait for libucc.la before linking.
6482
all-component/%: libucc.la
6583
+$(MAKE) -C $* all
6684

85+
# Components that contain CUDA kernels must also wait for the early kernel
86+
# compilation to finish, so two make processes never run in the same directory.
87+
if HAVE_CUDA
88+
all-component/components/ec/cuda: compile-kernels/components/ec/cuda/kernel
89+
endif
90+
if TL_CUDA_NVLS_ENABLED
91+
all-component/components/tl/cuda: compile-kernels/components/tl/cuda/kernels
92+
endif
93+
6794
# Component installs must wait for libucc.la to be installed to the prefix
6895
# first, otherwise libtool relinking fails with "cannot find -lucc".
6996
install-exec-local: install-libLTLIBRARIES $(COMPONENT_DIRS:%=install-component/%)
@@ -74,6 +101,10 @@ clean-local: $(COMPONENT_DIRS:%=clean-component/%)
74101
clean-component/%:
75102
+$(MAKE) -C $* clean
76103

104+
uninstall-local: $(COMPONENT_DIRS:%=uninstall-component/%)
105+
uninstall-component/%:
106+
+$(MAKE) -C $* uninstall
107+
77108
distclean-local: $(COMPONENT_DIRS:%=distclean-component/%)
78109
distclean-component/%:
79110
-+$(MAKE) -C $* distclean

src/components/ec/cuda/kernel/ec_cuda_reduce.cu

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -11,6 +11,7 @@
1111

1212
extern "C" {
1313
#include "../ec_cuda.h"
14+
#include "utils/ucc_math.h"
1415
}
1516

1617
extern "C" {

0 commit comments

Comments
 (0)