10#ifndef TLAPACK_TRMM_BLOCKED_MIXED_HH
11#define TLAPACK_TRMM_BLOCKED_MIXED_HH
91 using idx_t = size_type<matrixA_t>;
92 using range = std::pair<idx_t, idx_t>;
95 const idx_t m = nrows(B);
96 const idx_t n = ncols(B);
97 const idx_t nb = min(opts.nb, m);
103 trans != Op::ConjTrans);
109 auto [W, work1] = reshape(work, nb, n);
111 using real_t = real_type<type_t<matrixB_t>>;
112 if (side == Side::Left) {
113 if (trans == Op::NoTrans) {
114 if (uplo == Uplo::Upper) {
115 for (idx_t i = 0; i < m; i += nb) {
116 const idx_t ib = min(nb, m - i);
119 slice(A, range(0, i), range(i, i + ib));
121 slice(A, range(i, i + ib), range(i, i + ib));
123 auto B0 = rows(B, range(0, i));
124 auto Bi = rows(B, range(i, i + ib));
125 auto BiLowPrecision = rows(W, range(0, ib));
128 lacpy(GENERAL, Bi, BiLowPrecision);
129 gemm(NO_TRANS, NO_TRANS, alpha, A0i, BiLowPrecision,
133 trmm(side, uplo, trans, diag, alpha, Aii, Bi);
140 else if (trans == Op::Trans) {
141 if (uplo == Uplo::Upper) {
149 if (uplo == Uplo::Upper) {
158 if (trans == Op::NoTrans) {
159 if (uplo == Uplo::Upper) {
166 else if (trans == Op::Trans) {
167 if (uplo == Uplo::Upper) {
175 if (uplo == Uplo::Upper) {
#define TLAPACK_DIAG
Macro for tlapack::concepts::Diag compatible with C++17.
Definition concepts.hpp:945
#define TLAPACK_SIDE
Macro for tlapack::concepts::Side compatible with C++17.
Definition concepts.hpp:927
#define TLAPACK_UPLO
Macro for tlapack::concepts::Uplo compatible with C++17.
Definition concepts.hpp:942
#define TLAPACK_SMATRIX
Macro for tlapack::concepts::SliceableMatrix compatible with C++17.
Definition concepts.hpp:899
#define TLAPACK_OP
Macro for tlapack::concepts::Op compatible with C++17.
Definition concepts.hpp:933
#define TLAPACK_WORKSPACE
Macro for tlapack::concepts::Workspace compatible with C++17.
Definition concepts.hpp:912
void lacpy(uplo_t uplo, const matrixA_t &A, matrixB_t &B)
Copies a matrix from A to B.
Definition lacpy.hpp:38
void trmm(Side side, Uplo uplo, Op trans, Diag diag, const alpha_t &alpha, const matrixA_t &A, matrixB_t &B)
Triangular matrix-matrix multiply:
Definition trmm.hpp:72
void gemm(Op transA, Op transB, const alpha_t &alpha, const matrixA_t &A, const matrixB_t &B, const beta_t &beta, matrixC_t &C)
General matrix-matrix multiply:
Definition gemm.hpp:61
void trmm_blocked_mixed(side_t side, uplo_t uplo, op_t trans, diag_t diag, const scalar_type< type_t< matrixA_t >, type_t< matrixB_t > > &alpha, const matrixA_t &A, matrixB_t &B, work_t &work, const TrmmBlockedOpts &opts={})
Triangular matrix-matrix multiply using a blocked algorithm.
Definition trmm_blocked_mixed.hpp:78
#define tlapack_error(info, detailedInfo)
Error handler.
Definition exceptionHandling.hpp:142
#define tlapack_check_false(cond)
Throw an error if cond is true.
Definition exceptionHandling.hpp:113
typename traits::real_type_traits< Types..., int >::type real_type
The common real type of the list of types.
Definition scalar_type_traits.hpp:113
typename traits::scalar_type_traits< Types..., int >::type scalar_type
The common scalar type of the list of types.
Definition scalar_type_traits.hpp:250
Options struct for trmm_blocked_mixed.
Definition trmm_blocked_mixed.hpp:23
size_t nb
Block size.
Definition trmm_blocked_mixed.hpp:24