11#ifndef TLAPACK_BLAS_GEMM_HH
12#define TLAPACK_BLAS_GEMM_HH
93 for (idx_t j = 0; j < n; ++j) {
94 for (idx_t i = 0; i < m; ++i)
96 for (idx_t
l = 0;
l <
k; ++
l) {
98 for (idx_t i = 0; i < m; ++i)
104 for (idx_t j = 0; j < n; ++j) {
105 for (idx_t i = 0; i < m; ++i)
107 for (idx_t
l = 0;
l <
k; ++
l) {
109 for (idx_t i = 0; i < m; ++i)
115 for (idx_t j = 0; j < n; ++j) {
116 for (idx_t i = 0; i < m; ++i)
118 for (idx_t
l = 0;
l <
k; ++
l) {
120 for (idx_t i = 0; i < m; ++i)
130 for (idx_t j = 0; j < n; ++j) {
131 for (idx_t i = 0; i < m; ++i) {
133 for (idx_t
l = 0;
l <
k; ++
l)
140 for (idx_t j = 0; j < n; ++j) {
141 for (idx_t i = 0; i < m; ++i) {
143 for (idx_t
l = 0;
l <
k; ++
l)
150 for (idx_t j = 0; j < n; ++j) {
151 for (idx_t i = 0; i < m; ++i) {
153 for (idx_t
l = 0;
l <
k; ++
l)
165 for (idx_t j = 0; j < n; ++j) {
166 for (idx_t i = 0; i < m; ++i) {
168 for (idx_t
l = 0;
l <
k; ++
l)
175 for (idx_t j = 0; j < n; ++j) {
176 for (idx_t i = 0; i < m; ++i) {
178 for (idx_t
l = 0;
l <
k; ++
l)
185 for (idx_t j = 0; j < n; ++j) {
186 for (idx_t i = 0; i < m; ++i) {
188 for (idx_t
l = 0;
l <
k; ++
l)
197#ifdef TLAPACK_USE_LAPACKPP
235 auto A_ = legacy_matrix(
A);
236 auto B_ = legacy_matrix(
B);
237 auto C_ = legacy_matrix(
C);
241 const auto& m =
C_.m;
242 const auto& n =
C_.n;
248 -3,
"Infs and NaNs in A or B will not propagate to C on output");
252 "Infs and NaNs in C on input will not propagate to C on output");
254 return ::blas::gemm((::blas::Layout)L, (::blas::Op)
transA,
261#if defined(TLAPACK_USE_BF16BF16FP32_GEMM) && __has_include(<stdfloat>) && __cplusplus > 202002L
263 #include <mkl_cblas.h>
281template <
class idx_t, Layout L>
291 auto A_ = legacy_matrix(
A);
292 auto B_ = legacy_matrix(
B);
293 auto C_ = legacy_matrix(
C);
298 const auto& m =
C_.m;
299 const auto& n =
C_.n;
#define TLAPACK_SCALAR
Macro for tlapack::concepts::Scalar compatible with C++17.
Definition concepts.hpp:915
#define TLAPACK_LEGACY_MATRIX
Macro for tlapack::concepts::LegacyMatrix compatible with C++17.
Definition concepts.hpp:951
#define TLAPACK_MATRIX
Macro for tlapack::concepts::Matrix compatible with C++17.
Definition concepts.hpp:896
void gemm(Op transA, Op transB, const alpha_t &alpha, const matrixA_t &A, const matrixB_t &B, const beta_t &beta, matrixC_t &C)
General matrix-matrix multiply:
Definition gemm.hpp:61
#define tlapack_check_false(cond)
Throw an error if cond is true.
Definition exceptionHandling.hpp:113
#define tlapack_warning(info, detailedInfo)
Warning handler.
Definition exceptionHandling.hpp:156
Concept for types that represent tlapack::Op.
Sort the numbers in D in increasing order (if ID = 'I') or in decreasing order (if ID = 'D' ).
Definition arrayTraits.hpp:15
typename traits::real_type_traits< Types..., int >::type real_type
The common real type of the list of types.
Definition scalar_type_traits.hpp:113
constexpr T conj(const T &x) noexcept
Extends std::conj() to real datatypes.
Definition utils.hpp:100
Op
Definition types.hpp:227
@ ConjTrans
conjugate transpose
constexpr Layout layout
Layout of a matrix or vector.
Definition arrayTraits.hpp:232
Layout
Definition types.hpp:29
@ ColMajor
Column-major layout.
Strong zero type.
Definition StrongZero.hpp:43