1919#include <ucs/datastruct/khash.h>
2020#include <uct/cuda/base/cuda_ctx.inl>
2121
22+ #include <stdlib.h>
2223
2324typedef struct uct_cuda_ipc_cache_hash_key {
2425 pid_t pid ;
@@ -250,17 +251,14 @@ static void uct_cuda_ipc_cache_evict_lru(uct_cuda_ipc_cache_t *cache)
250251
251252static void uct_cuda_ipc_cache_purge (uct_cuda_ipc_cache_t * cache )
252253{
253- int active = uct_cuda_ctx_is_active ();
254254 uct_cuda_ipc_cache_region_t * region , * tmp ;
255255 ucs_list_link_t region_list ;
256256
257257 ucs_list_head_init (& region_list );
258258 ucs_pgtable_purge (& cache -> pgtable , uct_cuda_ipc_cache_region_collect_callback ,
259259 & region_list );
260260 ucs_list_for_each_safe (region , tmp , & region_list , list ) {
261- if (active ) {
262- uct_cuda_ipc_close_memhandle (region );
263- }
261+ uct_cuda_ipc_close_memhandle (region );
264262 ucs_free (region );
265263 }
266264
@@ -831,23 +829,8 @@ void uct_cuda_ipc_cache_set_global_limits(unsigned long max_regions,
831829 max_size );
832830}
833831
834- UCS_STATIC_INIT {
835- ucs_recursive_spinlock_init (& uct_cuda_ipc_remote_cache .lock , 0 );
836- kh_init_inplace (cuda_ipc_rem_cache , & uct_cuda_ipc_remote_cache .hash );
837- uct_cuda_ipc_remote_cache .max_regions = ULONG_MAX ;
838- uct_cuda_ipc_remote_cache .max_size = SIZE_MAX ;
839-
840- #if HAVE_CUDA_FABRIC
841- pthread_rwlock_init (& uct_cuda_ipc_rem_mpool_cache .lock , NULL );
842- /* Assumption: If import process succeeds, then the two nodes are in the
843- * same domain. Within a domain, fabric handles are expected to be unique.
844- * For this reason, there is no need to maintain a hashmap per peer OS as
845- * key collisions are not expected to occur. */
846- kh_init_inplace (cuda_ipc_rem_mpool_cache , & uct_cuda_ipc_rem_mpool_cache .hash );
847- #endif
848- }
849-
850- UCS_STATIC_CLEANUP {
832+ static void uct_cuda_ipc_cleanup_atexit (void )
833+ {
851834 uct_cuda_ipc_cache_t * rem_cache ;
852835
853836#if HAVE_CUDA_FABRIC
@@ -867,3 +850,25 @@ UCS_STATIC_CLEANUP {
867850 kh_destroy_inplace (cuda_ipc_rem_cache , & uct_cuda_ipc_remote_cache .hash );
868851 ucs_recursive_spinlock_destroy (& uct_cuda_ipc_remote_cache .lock );
869852}
853+
854+ UCS_STATIC_INIT
855+ {
856+ ucs_recursive_spinlock_init (& uct_cuda_ipc_remote_cache .lock , 0 );
857+ kh_init_inplace (cuda_ipc_rem_cache , & uct_cuda_ipc_remote_cache .hash );
858+ uct_cuda_ipc_remote_cache .max_regions = ULONG_MAX ;
859+ uct_cuda_ipc_remote_cache .max_size = SIZE_MAX ;
860+
861+ #if HAVE_CUDA_FABRIC
862+ pthread_rwlock_init (& uct_cuda_ipc_rem_mpool_cache .lock , NULL );
863+ /* Assumption: If import process succeeds, then the two nodes are in the
864+ * same domain. Within a domain, fabric handles are expected to be unique.
865+ * For this reason, there is no need to maintain a hashmap per peer OS as
866+ * key collisions are not expected to occur. */
867+ kh_init_inplace (cuda_ipc_rem_mpool_cache ,
868+ & uct_cuda_ipc_rem_mpool_cache .hash );
869+ #endif
870+
871+ /* Cleanup at process exit while CUDA driver is still alive;
872+ * UCS_STATIC_CLEANUP may run after CUDA is deinitialized. */
873+ atexit (uct_cuda_ipc_cleanup_atexit );
874+ }
0 commit comments