1313
1414ucc_status_t ucc_tl_ucp_alltoallv_onesided_start (ucc_coll_task_t * ctask )
1515{
16- ucc_tl_ucp_task_t * task = ucc_derived_of (ctask , ucc_tl_ucp_task_t );
17- ucc_tl_ucp_team_t * team = TASK_TEAM (task );
18- ptrdiff_t src = (ptrdiff_t )TASK_ARGS (task ).src .info_v .buffer ;
19- ptrdiff_t dest = (ptrdiff_t )TASK_ARGS (task ).dst .info_v .buffer ;
20- ucc_rank_t grank = UCC_TL_TEAM_RANK (team );
21- ucc_rank_t gsize = UCC_TL_TEAM_SIZE (team );
22- long * pSync = TASK_ARGS (task ).global_work_buffer ;
23- ucc_aint_t * s_disp = TASK_ARGS (task ).src .info_v .displacements ;
24- ucc_aint_t * d_disp = TASK_ARGS (task ).dst .info_v .displacements ;
25- size_t sdt_size = ucc_dt_size (TASK_ARGS (task ).src .info_v .datatype );
26- size_t rdt_size = ucc_dt_size (TASK_ARGS (task ).dst .info_v .datatype );
27- ucc_mem_map_mem_h src_memh = TASK_ARGS (task ).src_memh .local_memh ;
28- ucc_mem_map_mem_h * dst_memh = TASK_ARGS (task ).dst_memh .global_memh ;
29- ucc_rank_t peer ;
30- size_t sd_disp , dd_disp , data_size ;
16+ ucc_tl_ucp_task_t * task = ucc_derived_of (ctask , ucc_tl_ucp_task_t );
17+ ucc_tl_ucp_team_t * team = TASK_TEAM (task );
18+ ptrdiff_t src = (ptrdiff_t )TASK_ARGS (task ).src .info_v .buffer ;
19+ ptrdiff_t dest = (ptrdiff_t )TASK_ARGS (task ).dst .info_v .buffer ;
20+ ucc_rank_t grank = UCC_TL_TEAM_RANK (team );
21+ ucc_rank_t gsize = UCC_TL_TEAM_SIZE (team );
22+ long * pSync = TASK_ARGS (task ).global_work_buffer ;
23+ ucc_aint_t * s_disp = TASK_ARGS (task ).src .info_v .displacements ;
24+ ucc_aint_t * d_disp = TASK_ARGS (task ).dst .info_v .displacements ;
25+ size_t sdt_size = ucc_dt_size (TASK_ARGS (task ).src .info_v .datatype );
26+ size_t rdt_size = ucc_dt_size (TASK_ARGS (task ).dst .info_v .datatype );
27+ ucc_mem_map_mem_h src_memh = TASK_ARGS (task ).src_memh .local_memh ;
28+ ucc_mem_map_mem_h * dst_memh = TASK_ARGS (task ).dst_memh .global_memh ;
29+ //ucc_mem_map_memh_t *src_memh_g = NULL;
30+ //ucc_mem_map_memh_t *dst_memh_g = NULL;
31+ ucc_rank_t peer ;
32+ ucc_status_t status ;
33+ size_t sd_disp , dd_disp , data_size ;
3134
3235 ucc_tl_ucp_task_reset (task , UCC_INPROGRESS );
36+ status = ucc_tl_ucp_coll_dynamic_segment_exchange (task );
37+ if (UCC_OK != status ) {
38+ task -> super .status = status ;
39+ goto out ;
40+ }
41+
42+ if (TASK_ARGS (task ).flags & UCC_COLL_ARGS_FLAG_SRC_MEMH_GLOBAL ) {
43+ src_memh = TASK_ARGS (task ).src_memh .global_memh [grank ];
44+ }
3345
3446 /* perform a put to each member peer using the peer's index in the
3547 * destination displacement. */
@@ -42,18 +54,16 @@ ucc_status_t ucc_tl_ucp_alltoallv_onesided_start(ucc_coll_task_t *ctask)
4254 ucc_coll_args_get_displacement (& TASK_ARGS (task ), d_disp , peer ) *
4355 rdt_size ;
4456 data_size =
45- ucc_coll_args_get_count (
46- & TASK_ARGS ( task ), TASK_ARGS (task ).src .info_v .counts , peer ) *
57+ ucc_coll_args_get_count (& TASK_ARGS ( task ),
58+ TASK_ARGS (task ).src .info_v .counts , peer ) *
4759 sdt_size ;
4860
4961 UCPCHECK_GOTO (ucc_tl_ucp_put_nb (PTR_OFFSET (src , sd_disp ),
50- PTR_OFFSET (dest , dd_disp ),
51- data_size , peer , src_memh ,
52- dst_memh , team , task ),
53- task , out );
54- UCPCHECK_GOTO (ucc_tl_ucp_atomic_inc (pSync , peer ,
55- dst_memh , team ),
62+ PTR_OFFSET (dest , dd_disp ), data_size ,
63+ peer , src_memh , dst_memh , team , task ),
5664 task , out );
65+ UCPCHECK_GOTO (ucc_tl_ucp_atomic_inc (pSync , peer , dst_memh , team ), task ,
66+ out );
5767 }
5868 return ucc_progress_queue_enqueue (UCC_TL_CORE_CTX (team )-> pq , & task -> super );
5969out :
@@ -73,15 +83,16 @@ void ucc_tl_ucp_alltoallv_onesided_progress(ucc_coll_task_t *ctask)
7383
7484 pSync [0 ] = 0 ;
7585 task -> super .status = UCC_OK ;
86+ ucc_tl_ucp_coll_dynamic_segment_finalize (task );
7687}
7788
7889ucc_status_t ucc_tl_ucp_alltoallv_onesided_init (ucc_base_coll_args_t * coll_args ,
7990 ucc_base_team_t * team ,
8091 ucc_coll_task_t * * task_h )
8192{
82- ucc_tl_ucp_team_t * tl_team = ucc_derived_of (team , ucc_tl_ucp_team_t );
83- ucc_tl_ucp_task_t * task ;
84- ucc_status_t status ;
93+ ucc_tl_ucp_team_t * tl_team = ucc_derived_of (team , ucc_tl_ucp_team_t );
94+ ucc_status_t status = UCC_OK ;
95+ ucc_tl_ucp_task_t * task ;
8596
8697 ALLTOALLV_TASK_CHECK (coll_args -> args , tl_team );
8798 if (!(coll_args -> args .mask & UCC_COLL_ARGS_FIELD_GLOBAL_WORK_BUFFER )) {
@@ -90,14 +101,6 @@ ucc_status_t ucc_tl_ucp_alltoallv_onesided_init(ucc_base_coll_args_t *coll_args,
90101 status = UCC_ERR_NOT_SUPPORTED ;
91102 goto out ;
92103 }
93- if (coll_args -> args .mask & UCC_COLL_ARGS_FIELD_FLAGS ) {
94- if (!(coll_args -> args .flags & UCC_COLL_ARGS_FLAG_MEM_MAPPED_BUFFERS )) {
95- tl_error (UCC_TL_TEAM_LIB (tl_team ),
96- "non memory mapped buffers are not supported" );
97- status = UCC_ERR_NOT_SUPPORTED ;
98- goto out ;
99- }
100- }
101104 if (!(coll_args -> args .mask & UCC_COLL_ARGS_FIELD_MEM_MAP_SRC_MEMH )) {
102105 coll_args -> args .src_memh .global_memh = NULL ;
103106 }
@@ -109,7 +112,14 @@ ucc_status_t ucc_tl_ucp_alltoallv_onesided_init(ucc_base_coll_args_t *coll_args,
109112 * task_h = & task -> super ;
110113 task -> super .post = ucc_tl_ucp_alltoallv_onesided_start ;
111114 task -> super .progress = ucc_tl_ucp_alltoallv_onesided_progress ;
112- status = UCC_OK ;
115+
116+ status = ucc_tl_ucp_coll_dynamic_segment_init (
117+ & coll_args -> args , task );
118+ if (UCC_OK != status ) {
119+ tl_error (UCC_TL_TEAM_LIB (tl_team ),
120+ "failed to initialize dynamic segments" );
121+ goto out ;
122+ }
113123out :
114124 return status ;
115125}
0 commit comments