@@ -2056,11 +2056,11 @@ class joint_matrix {
20562056 const size_t num_elements;
20572057};
20582058
2059- // / Loads 1 8x8 b16 matrix from shared memory to local memory (32-bits per wi)
2059+ // / Loads 1 8x8 b16 matrix from local memory to private memory (32-bits per wi)
20602060// / Requires the sub-group size of kernel calling this function to be 32
20612061// / \tparam [in] T The type of result variable
2062- // / \param [in] addr The address of the matrix in shared memory
2063- // / \param [in] m The local memory to store the matrix
2062+ // / \param [in] addr The address of the matrix in local memory
2063+ // / \param [in] m The private memory to store the matrix
20642064// / \param [in] item The sycl::nd_item index space class
20652065// / \param [in] trans Indicates whether the matrix to be loaded transposed
20662066// / \param [in] mat The matrix index to be loaded
@@ -2112,12 +2112,12 @@ void ldmatrix(uintptr_t addr, T *m, const ItemT &item, bool trans = false,
21122112 }
21132113}
21142114
2115- // / Loads 2 8x8 b16 matrix from shared memory to local memory (32-bits per wi)
2115+ // / Loads 2 8x8 b16 matrix from local memory to private memory (32-bits per wi)
21162116// / Requires the sub-group size of kernel calling this function to be 32
21172117// / \tparam [in] T The type of result variable
2118- // / \param [in] addr The address of the matrix in shared memory
2119- // / \param [in] m1 The local memory to store data of 1st matrix
2120- // / \param [in] m2 The local memory to store data of 2nd matrix
2118+ // / \param [in] addr The address of the matrix in local memory
2119+ // / \param [in] m1 The private memory to store data of 1st matrix
2120+ // / \param [in] m2 The private memory to store data of 2nd matrix
21212121// / \param [in] item The sycl::nd_item index space class
21222122// / \param [in] trans Indicates whether the matrix to be loaded transposed
21232123template <typename T, typename ItemT>
@@ -2129,14 +2129,14 @@ void ldmatrix(uintptr_t addr, T *m1, T *m2, const ItemT &item,
21292129 ldmatrix (addr, m2, item, trans, 1 );
21302130}
21312131
2132- // / Loads 4 8x8 b16 matrix from shared memory to local memory (32-bits per wi)
2132+ // / Loads 4 8x8 b16 matrix from local memory to private memory (32-bits per wi)
21332133// / Requires the sub-group size of kernel calling this function to be 32
21342134// / \tparam [in] T The type of result variable
2135- // / \param [in] addr The address of the matrix in shared memory
2136- // / \param [in] m1 The local memory to store data of 1st matrix
2137- // / \param [in] m2 The local memory to store data of 2nd matrix
2138- // / \param [in] m3 The local memory to store data of 3rd matrix
2139- // / \param [in] m4 The local memory to store data of 4th matrix
2135+ // / \param [in] addr The address of the matrix in local memory
2136+ // / \param [in] m1 The private memory to store data of 1st matrix
2137+ // / \param [in] m2 The private memory to store data of 2nd matrix
2138+ // / \param [in] m3 The private memory to store data of 3rd matrix
2139+ // / \param [in] m4 The private memory to store data of 4th matrix
21402140// / \param [in] item The sycl::nd_item index space class
21412141// / \param [in] trans Indicates whether the matrix to be loaded transposed
21422142template <typename T, typename ItemT>
0 commit comments