@@ -229,7 +229,7 @@ lu(__global DEVICE_DATA_TYPE* restrict a,
229229 for (int i = 0 ; i < BLOCK_SIZE /GEMM_BLOCK ; i ++ ) {
230230 for (int ii = 0 ; ii < GEMM_BLOCK ; ii ++ ) {
231231 for (int j = 0 ; j < BLOCK_SIZE /GEMM_BLOCK ; j ++ ) {
232- __attribute__((opencl_unroll_hint (GLOBAL_MEM_UNROLL )))
232+ __attribute__((opencl_unroll_hint (GEMM_BLOCK )))
233233 for (int jj = 0 ; jj < GEMM_BLOCK ; jj ++ ) {
234234 a_buffer [i ][j ][ii ][jj ] = a [block_col * BLOCK_SIZE + (block_row * BLOCK_SIZE + i * GEMM_BLOCK + ii ) * BLOCK_SIZE * blocks_per_row + j * GEMM_BLOCK + jj ];
235235 }
@@ -411,7 +411,7 @@ lu(__global DEVICE_DATA_TYPE* restrict a,
411411 for (int i = 0 ; i < BLOCK_SIZE /GEMM_BLOCK ; i ++ ) {
412412 for (int ii = 0 ; ii < GEMM_BLOCK ; ii ++ ) {
413413 for (int j = 0 ; j < BLOCK_SIZE /GEMM_BLOCK ; j ++ ) {
414- __attribute__((opencl_unroll_hint (GLOBAL_MEM_UNROLL )))
414+ __attribute__((opencl_unroll_hint (GEMM_BLOCK )))
415415 for (int jj = 0 ; jj < GEMM_BLOCK ; jj ++ ) {
416416 a [block_col * BLOCK_SIZE + (block_row * BLOCK_SIZE + i * GEMM_BLOCK + ii ) * BLOCK_SIZE * blocks_per_row + j * GEMM_BLOCK + jj ] = a_buffer [i ][j ][ii ][jj ];
417417 }
@@ -423,7 +423,7 @@ lu(__global DEVICE_DATA_TYPE* restrict a,
423423 for (int i = 0 ; i < BLOCK_SIZE /GEMM_BLOCK ; i ++ ) {
424424 for (int ii = 0 ; ii < GEMM_BLOCK ; ii ++ ) {
425425 for (int j = 0 ; j < BLOCK_SIZE /GEMM_BLOCK ; j ++ ) {
426- __attribute__((opencl_unroll_hint (GLOBAL_MEM_UNROLL )))
426+ __attribute__((opencl_unroll_hint (GEMM_BLOCK )))
427427 for (int jj = 0 ; jj < GEMM_BLOCK ; jj ++ ) {
428428 a_block_trans [(i * GEMM_BLOCK + ii ) * BLOCK_SIZE + j * GEMM_BLOCK + jj ] = a_buffer [j ][i ][jj ][ii ];
429429 }
@@ -434,7 +434,7 @@ lu(__global DEVICE_DATA_TYPE* restrict a,
434434 for (int i = 0 ; i < BLOCK_SIZE /GEMM_BLOCK ; i ++ ) {
435435 for (int ii = 0 ; ii < GEMM_BLOCK ; ii ++ ) {
436436 for (int j = 0 ; j < BLOCK_SIZE /GEMM_BLOCK ; j ++ ) {
437- __attribute__((opencl_unroll_hint (GLOBAL_MEM_UNROLL )))
437+ __attribute__((opencl_unroll_hint (GEMM_BLOCK )))
438438 for (int jj = 0 ; jj < GEMM_BLOCK ; jj ++ ) {
439439 a_block [(i * GEMM_BLOCK + ii ) * BLOCK_SIZE + j * GEMM_BLOCK + jj ] = a_buffer [i ][j ][ii ][jj ];
440440 }
@@ -466,7 +466,7 @@ void top_update(__global DEVICE_DATA_TYPE* restrict a,
466466 for (int i = 0 ; i < BLOCK_SIZE /GEMM_BLOCK ; i ++ ) {
467467 for (int ii = 0 ; ii < GEMM_BLOCK ; ii ++ ) {
468468 for (int j = 0 ; j < BLOCK_SIZE /GEMM_BLOCK ; j ++ ) {
469- __attribute__((opencl_unroll_hint (GLOBAL_MEM_UNROLL )))
469+ __attribute__((opencl_unroll_hint (GEMM_BLOCK )))
470470 for (int jj = 0 ; jj < GEMM_BLOCK ; jj ++ ) {
471471 a_buffer [i ][j ][ii ][jj ] = a [block_col * BLOCK_SIZE + (block_row * BLOCK_SIZE + i * GEMM_BLOCK + ii ) * BLOCK_SIZE * blocks_per_row + j * GEMM_BLOCK + jj ];
472472 }
@@ -558,7 +558,7 @@ void top_update(__global DEVICE_DATA_TYPE* restrict a,
558558 for (int i = 0 ; i < BLOCK_SIZE /GEMM_BLOCK ; i ++ ) {
559559 for (int ii = 0 ; ii < GEMM_BLOCK ; ii ++ ) {
560560 for (int j = 0 ; j < BLOCK_SIZE /GEMM_BLOCK ; j ++ ) {
561- __attribute__((opencl_unroll_hint (GLOBAL_MEM_UNROLL )))
561+ __attribute__((opencl_unroll_hint (GEMM_BLOCK )))
562562 for (int jj = 0 ; jj < GEMM_BLOCK ; jj ++ ) {
563563 a [block_col * BLOCK_SIZE + (block_row * BLOCK_SIZE + i * GEMM_BLOCK + ii ) * BLOCK_SIZE * blocks_per_row + j * GEMM_BLOCK + jj ] = a_buffer [i ][j ][ii ][jj ];
564564 }
@@ -570,7 +570,7 @@ void top_update(__global DEVICE_DATA_TYPE* restrict a,
570570 for (int i = 0 ; i < BLOCK_SIZE /GEMM_BLOCK ; i ++ ) {
571571 for (int ii = 0 ; ii < GEMM_BLOCK ; ii ++ ) {
572572 for (int j = 0 ; j < BLOCK_SIZE /GEMM_BLOCK ; j ++ ) {
573- __attribute__((opencl_unroll_hint (GLOBAL_MEM_UNROLL )))
573+ __attribute__((opencl_unroll_hint (GEMM_BLOCK )))
574574 for (int jj = 0 ; jj < GEMM_BLOCK ; jj ++ ) {
575575 top_block [(i * GEMM_BLOCK + ii ) * BLOCK_SIZE + j * GEMM_BLOCK + jj ] = a_buffer [i ][j ][ii ][jj ];
576576 }
@@ -601,7 +601,7 @@ void left_update(__global DEVICE_DATA_TYPE* restrict a,
601601 for (int i = 0 ; i < BLOCK_SIZE /GEMM_BLOCK ; i ++ ) {
602602 for (int ii = 0 ; ii < GEMM_BLOCK ; ii ++ ) {
603603 for (int j = 0 ; j < BLOCK_SIZE /GEMM_BLOCK ; j ++ ) {
604- __attribute__((opencl_unroll_hint (GLOBAL_MEM_UNROLL )))
604+ __attribute__((opencl_unroll_hint (GEMM_BLOCK )))
605605 for (int jj = 0 ; jj < GEMM_BLOCK ; jj ++ ) {
606606 a_buffer [i ][j ][ii ][jj ] = a [block_col * BLOCK_SIZE + (block_row * BLOCK_SIZE + i * GEMM_BLOCK + ii ) * BLOCK_SIZE * blocks_per_row + j * GEMM_BLOCK + jj ];
607607 }
@@ -684,7 +684,7 @@ void left_update(__global DEVICE_DATA_TYPE* restrict a,
684684 for (int i = 0 ; i < BLOCK_SIZE /GEMM_BLOCK ; i ++ ) {
685685 for (int ii = 0 ; ii < GEMM_BLOCK ; ii ++ ) {
686686 for (int j = 0 ; j < BLOCK_SIZE /GEMM_BLOCK ; j ++ ) {
687- __attribute__((opencl_unroll_hint (GLOBAL_MEM_UNROLL )))
687+ __attribute__((opencl_unroll_hint (GEMM_BLOCK )))
688688 for (int jj = 0 ; jj < GEMM_BLOCK ; jj ++ ) {
689689 a [block_col * BLOCK_SIZE + (block_row * BLOCK_SIZE + i * GEMM_BLOCK + ii ) * BLOCK_SIZE * blocks_per_row + j * GEMM_BLOCK + jj ] = a_buffer [i ][j ][ii ][jj ];
690690 }
@@ -697,7 +697,7 @@ void left_update(__global DEVICE_DATA_TYPE* restrict a,
697697 for (int i = 0 ; i < BLOCK_SIZE /GEMM_BLOCK ; i ++ ) {
698698 for (int ii = 0 ; ii < GEMM_BLOCK ; ii ++ ) {
699699 for (int j = 0 ; j < BLOCK_SIZE /GEMM_BLOCK ; j ++ ) {
700- __attribute__((opencl_unroll_hint (GLOBAL_MEM_UNROLL )))
700+ __attribute__((opencl_unroll_hint (GEMM_BLOCK )))
701701 for (int jj = 0 ; jj < GEMM_BLOCK ; jj ++ ) {
702702 left_block [(i * GEMM_BLOCK + ii ) * BLOCK_SIZE + j * GEMM_BLOCK + jj ] = a_buffer [j ][i ][jj ][ii ];
703703 }
0 commit comments