|
| 1 | +#ifndef FSLINALG_BASIC_LINALG_GENERAL_MATRIX_MATRIX_PRODUCT_IMPL_HPP |
| 2 | +#define FSLINALG_BASIC_LINALG_GENERAL_MATRIX_MATRIX_PRODUCT_IMPL_HPP |
| 3 | + |
| 4 | +#include <FSLinalg/BasicLinalg/GeneralMatrixMatrixProduct.hpp> |
| 5 | +#include <FSLinalg/BasicLinalg/TripleProduct.hpp> |
| 6 | +#include <FSLinalg/BasicLinalg/Product.hpp> |
| 7 | + |
| 8 | +namespace FSLinalg |
| 9 | +{ |
| 10 | +namespace BasicLinalg |
| 11 | +{ |
| 12 | + |
| 13 | +template<bool transposeA, bool conjugateA, unsigned int nRowsA, unsigned int nColsA, bool transposeB, bool conjugateB, unsigned int nRowsB, unsigned int nColsB, bool incrDst> |
| 14 | +template<Scalar_concept ScalarAlpha, Scalar_concept ScalarA, Scalar_concept ScalarB, Scalar_concept ScalarY> |
| 15 | +void GeneralMatrixMatrixProduct<transposeA,conjugateA,nRowsA,nColsA,transposeB,conjugateB,nRowsB,nColsB,incrDst>::run( |
| 16 | + const ScalarAlpha& alpha, |
| 17 | + const Matrix<ScalarA,nRowsA,nColsA>& A, |
| 18 | + const Matrix<ScalarB,nRowsB,nColsB>& B, |
| 19 | + Matrix<ScalarY,nRowsY,nColsY>& Y) |
| 20 | +{ |
| 21 | + constexpr Size A_iStride = (not transposeA) ? nColsA : 1; |
| 22 | + constexpr Size A_kStride = (not transposeA) ? 1 : nColsA; |
| 23 | + constexpr Size B_kStride = (not transposeB) ? nColsB : 1; |
| 24 | + constexpr Size B_jStride = (not transposeB) ? 1 : nColsB; |
| 25 | + |
| 26 | + constexpr TripleProduct<false, conjugateA, conjugateB> prod; |
| 27 | + |
| 28 | + if constexpr (not incrDst) |
| 29 | + { |
| 30 | + for (Size i=0; i!=nRowsY*nColsY; ++i) { Y[i] = 0; } |
| 31 | + } |
| 32 | + |
| 33 | + for (Size i=0; i!=nRowsY; ++i) |
| 34 | + { |
| 35 | + for (Size k=0; k !=nColsOpA; ++k) |
| 36 | + { |
| 37 | + for (Size j=0; j!=nColsY; ++j) |
| 38 | + { |
| 39 | + Y(i,j) += prod(alpha, A[i*A_iStride + k*A_kStride], B[k*B_kStride + j*B_jStride]); |
| 40 | + } |
| 41 | + } |
| 42 | + } |
| 43 | +} |
| 44 | + |
| 45 | +template<bool transposeA, bool conjugateA, unsigned int nRowsA, unsigned int nColsA, bool transposeB, bool conjugateB, unsigned int nRowsB, unsigned int nColsB, bool incrDst> |
| 46 | +template<Scalar_concept ScalarAlpha, Scalar_concept ScalarA, Scalar_concept ScalarY> |
| 47 | +void GeneralMatrixMatrixProduct<transposeA,conjugateA,nRowsA,nColsA,transposeB,conjugateB,nRowsB,nColsB,incrDst>::run( |
| 48 | + const ScalarAlpha& alpha, |
| 49 | + const Matrix<ScalarA,nRowsA,nColsA>& A, |
| 50 | + const UnitMatrix<nRowsB,nColsB>& B, |
| 51 | + Matrix<ScalarY,nRowsY,nColsY>& Y) |
| 52 | +{ |
| 53 | + constexpr Size A_iStride = (not transposeA) ? nColsA : 1; |
| 54 | + constexpr Size A_kStride = (not transposeA) ? 1 : nColsA; |
| 55 | + |
| 56 | + constexpr Product<false, conjugateA> prod; |
| 57 | + |
| 58 | + if constexpr (not incrDst) |
| 59 | + { |
| 60 | + for (Size i=0; i!=nRowsY*nColsY; ++i) { Y[i] = 0; } |
| 61 | + } |
| 62 | + |
| 63 | + const Size k = (not transposeB) ? B.getId().i : B.getId().j; |
| 64 | + const Size j = (not transposeB) ? B.getId().j : B.getId().i; |
| 65 | + |
| 66 | + for (Size i=0; i!=nRowsY; ++i) |
| 67 | + { |
| 68 | + Y(i,j) += prod(alpha, A[i*A_iStride + k*A_kStride]); |
| 69 | + } |
| 70 | +} |
| 71 | + |
| 72 | +template<bool transposeA, bool conjugateA, unsigned int nRowsA, unsigned int nColsA, bool transposeB, bool conjugateB, unsigned int nRowsB, unsigned int nColsB, bool incrDst> |
| 73 | +template<Scalar_concept ScalarAlpha, Scalar_concept ScalarB, Scalar_concept ScalarY> |
| 74 | +void GeneralMatrixMatrixProduct<transposeA,conjugateA,nRowsA,nColsA,transposeB,conjugateB,nRowsB,nColsB,incrDst>::run( |
| 75 | + const ScalarAlpha& alpha, |
| 76 | + const UnitMatrix<nRowsA,nColsA>& A, |
| 77 | + const Matrix<ScalarB,nRowsB,nColsB>& B, |
| 78 | + Matrix<ScalarY,nRowsY,nColsY>& Y) |
| 79 | +{ |
| 80 | + constexpr Size B_kStride = (not transposeB) ? nColsB : 1; |
| 81 | + constexpr Size B_jStride = (not transposeB) ? 1 : nColsB; |
| 82 | + |
| 83 | + constexpr Product<false, conjugateB> prod; |
| 84 | + |
| 85 | + if constexpr (not incrDst) |
| 86 | + { |
| 87 | + for (Size i=0; i!=nRowsY*nColsY; ++i) { Y[i] = 0; } |
| 88 | + } |
| 89 | + |
| 90 | + const Size i = (not transposeA) ? A.getId().i : A.getId().j; |
| 91 | + const Size k = (not transposeA) ? A.getId().j : A.getId().i; |
| 92 | + |
| 93 | + |
| 94 | + for (Size j=0; j!=nColsY; ++j) |
| 95 | + { |
| 96 | + Y(i,j) += prod(alpha, B[k*B_kStride + j*B_jStride]); |
| 97 | + } |
| 98 | +} |
| 99 | + |
| 100 | +template<bool transposeA, bool conjugateA, unsigned int nRowsA, unsigned int nColsA, bool transposeB, bool conjugateB, unsigned int nRowsB, unsigned int nColsB, bool incrDst> |
| 101 | +template<Scalar_concept ScalarAlpha, Scalar_concept ScalarY> |
| 102 | +void GeneralMatrixMatrixProduct<transposeA,conjugateA,nRowsA,nColsA,transposeB,conjugateB,nRowsB,nColsB,incrDst>::run( |
| 103 | + const ScalarAlpha& alpha, |
| 104 | + const UnitMatrix<nRowsA,nColsA>& A, |
| 105 | + const UnitMatrix<nRowsB,nColsB>& B, |
| 106 | + Matrix<ScalarY,nRowsY,nColsY>& Y) |
| 107 | +{ |
| 108 | + if constexpr (not incrDst) |
| 109 | + { |
| 110 | + for (Size i=0; i!=nRowsY*nColsY; ++i) { Y[i] = 0; } |
| 111 | + } |
| 112 | + |
| 113 | + const Size i = (not transposeA) ? A.getId().i : A.getId().j; |
| 114 | + const Size j = (not transposeB) ? B.getId().j : B.getId().i; |
| 115 | + const Size k1 = (not transposeA) ? A.getId().j : A.getId().i; |
| 116 | + const Size k2 = (not transposeB) ? B.getId().i : B.getId().j; |
| 117 | + |
| 118 | + Y(i,j) += alpha*(k1 == k2); |
| 119 | +} |
| 120 | + |
| 121 | +} // namespace BasicLinalg |
| 122 | +} // namespace FSLinalg |
| 123 | + |
| 124 | +#endif // FSLINALG_BASIC_LINALG_GENERAL_MATRIX_MATRIX_PRODUCT_IMPL_HPP |
0 commit comments