Skip to content

Commit f8a1d9f

Browse files
committed
UCT/CUDA/CUDA_IPC: Fixed cuda_ipc_cache cleanup at process termination.
1 parent 93d3c2a commit f8a1d9f

1 file changed

Lines changed: 26 additions & 21 deletions

File tree

src/uct/cuda/cuda_ipc/cuda_ipc_cache.c

Lines changed: 26 additions & 21 deletions
Original file line numberDiff line numberDiff line change
@@ -19,6 +19,7 @@
1919
#include <ucs/datastruct/khash.h>
2020
#include <uct/cuda/base/cuda_ctx.inl>
2121

22+
#include <stdlib.h>
2223

2324
typedef struct uct_cuda_ipc_cache_hash_key {
2425
pid_t pid;
@@ -250,17 +251,14 @@ static void uct_cuda_ipc_cache_evict_lru(uct_cuda_ipc_cache_t *cache)
250251

251252
static void uct_cuda_ipc_cache_purge(uct_cuda_ipc_cache_t *cache)
252253
{
253-
int active = uct_cuda_ctx_is_active();
254254
uct_cuda_ipc_cache_region_t *region, *tmp;
255255
ucs_list_link_t region_list;
256256

257257
ucs_list_head_init(&region_list);
258258
ucs_pgtable_purge(&cache->pgtable, uct_cuda_ipc_cache_region_collect_callback,
259259
&region_list);
260260
ucs_list_for_each_safe(region, tmp, &region_list, list) {
261-
if (active) {
262-
uct_cuda_ipc_close_memhandle(region);
263-
}
261+
uct_cuda_ipc_close_memhandle(region);
264262
ucs_free(region);
265263
}
266264

@@ -831,23 +829,8 @@ void uct_cuda_ipc_cache_set_global_limits(unsigned long max_regions,
831829
max_size);
832830
}
833831

834-
UCS_STATIC_INIT {
835-
ucs_recursive_spinlock_init(&uct_cuda_ipc_remote_cache.lock, 0);
836-
kh_init_inplace(cuda_ipc_rem_cache, &uct_cuda_ipc_remote_cache.hash);
837-
uct_cuda_ipc_remote_cache.max_regions = ULONG_MAX;
838-
uct_cuda_ipc_remote_cache.max_size = SIZE_MAX;
839-
840-
#if HAVE_CUDA_FABRIC
841-
pthread_rwlock_init(&uct_cuda_ipc_rem_mpool_cache.lock, NULL);
842-
/* Assumption: If import process succeeds, then the two nodes are in the
843-
* same domain. Within a domain, fabric handles are expected to be unique.
844-
* For this reason, there is no need to maintain a hashmap per peer OS as
845-
* key collisions are not expected to occur. */
846-
kh_init_inplace(cuda_ipc_rem_mpool_cache, &uct_cuda_ipc_rem_mpool_cache.hash);
847-
#endif
848-
}
849-
850-
UCS_STATIC_CLEANUP {
832+
static void uct_cuda_ipc_cleanup_atexit(void)
833+
{
851834
uct_cuda_ipc_cache_t *rem_cache;
852835

853836
#if HAVE_CUDA_FABRIC
@@ -867,3 +850,25 @@ UCS_STATIC_CLEANUP {
867850
kh_destroy_inplace(cuda_ipc_rem_cache, &uct_cuda_ipc_remote_cache.hash);
868851
ucs_recursive_spinlock_destroy(&uct_cuda_ipc_remote_cache.lock);
869852
}
853+
854+
UCS_STATIC_INIT
855+
{
856+
ucs_recursive_spinlock_init(&uct_cuda_ipc_remote_cache.lock, 0);
857+
kh_init_inplace(cuda_ipc_rem_cache, &uct_cuda_ipc_remote_cache.hash);
858+
uct_cuda_ipc_remote_cache.max_regions = ULONG_MAX;
859+
uct_cuda_ipc_remote_cache.max_size = SIZE_MAX;
860+
861+
#if HAVE_CUDA_FABRIC
862+
pthread_rwlock_init(&uct_cuda_ipc_rem_mpool_cache.lock, NULL);
863+
/* Assumption: If import process succeeds, then the two nodes are in the
864+
* same domain. Within a domain, fabric handles are expected to be unique.
865+
* For this reason, there is no need to maintain a hashmap per peer OS as
866+
* key collisions are not expected to occur. */
867+
kh_init_inplace(cuda_ipc_rem_mpool_cache,
868+
&uct_cuda_ipc_rem_mpool_cache.hash);
869+
#endif
870+
871+
/* Cleanup at process exit while CUDA driver is still alive;
872+
* UCS_STATIC_CLEANUP may run after CUDA is deinitialized. */
873+
atexit(uct_cuda_ipc_cleanup_atexit);
874+
}

0 commit comments

Comments
 (0)