@@ -25,26 +25,22 @@ unsigned __attribute__((noinline)) static_schedule(void) {
2525#pragma omp parallel firstprivate(data_a, data_x, data_y)
2626 {
2727 int nthreads = omp_get_num_threads ();
28- // DM, rep, bound, stride, data
29- __builtin_ssr_setup_1d_r (
30- 0 , 0 , AXPY_N / nthreads - 1 , sizeof (double ),
31- & data_x [AXPY_N / nthreads * omp_get_thread_num ()]);
32- __builtin_ssr_setup_1d_r (
33- 1 , 0 , AXPY_N / nthreads - 1 , sizeof (double ),
34- & data_y [AXPY_N / nthreads * omp_get_thread_num ()]);
35- __builtin_ssr_setup_1d_w (
36- 2 , 0 , AXPY_N / nthreads - 1 , sizeof (double ),
37- & data_y [AXPY_N / nthreads * omp_get_thread_num ()]);
38- __builtin_ssr_enable ();
28+ snrt_ssr_loop_1d (SNRT_SSR_DM0 , AXPY_N / nthreads , sizeof (double ));
29+ snrt_ssr_read (SNRT_SSR_DM0 , SNRT_SSR_1D , & data_x [AXPY_N / nthreads * omp_get_thread_num ()]);
30+ snrt_ssr_loop_1d (SNRT_SSR_DM1 , AXPY_N / nthreads , sizeof (double ));
31+ snrt_ssr_read (SNRT_SSR_DM1 , SNRT_SSR_1D , & data_y [AXPY_N / nthreads * omp_get_thread_num ()]);
32+ snrt_ssr_loop_1d (SNRT_SSR_DM2 , AXPY_N / nthreads , sizeof (double ));
33+ snrt_ssr_write (SNRT_SSR_DM2 , SNRT_SSR_1D , & data_y [AXPY_N / nthreads * omp_get_thread_num ()]);
34+ snrt_ssr_enable ();
3935#pragma omp for schedule(static)
4036 for (unsigned i = 0 ; i < AXPY_N ; i ++ ) {
41- // data_y[i] = data_a * data_x[i] + data_y[i];
42- // data_y[i] = data_a * __builtin_ssr_pop(0) +
43- __builtin_ssr_pop ( 1 );
44- __builtin_ssr_push (
45- 2 , data_a * __builtin_ssr_pop ( 0 ) + __builtin_ssr_pop ( 1 ) );
37+ asm volatile (
38+ "fmadd.d ft2, %[a], ft0, ft1\n"
39+ :
40+ : [ a ] "f" ( data_a )
41+ : "ft0" , "ft1" , "ft2" , "memory" );
4642 }
47- __builtin_ssr_disable ();
43+ snrt_ssr_disable ();
4844 }
4945
5046 // check data
0 commit comments