12#ifndef TLAPACK_GEHRD_HH
13#define TLAPACK_GEHRD_HH
51template <
class T, TLAPACK_SMATRIX matrix_t, TLAPACK_SVECTOR vector_t>
58 using idx_t = size_type<matrix_t>;
59 using work_t = matrix_type<matrix_t, vector_t>;
60 using range = pair<idx_t, idx_t>;
62 const idx_t n = ncols(A);
63 const idx_t nb = (ilo < ihi) ? min<idx_t>(opts.nb, ihi - ilo - 1) : 0;
64 const idx_t nx = max<idx_t>(nb, opts.nx_switch);
67 if constexpr (is_same_v<T, type_t<work_t>>) {
69 if ((ilo < ihi) && (nx < ihi - ilo - 1)) {
70 workinfo = WorkInfo(n + nb, nb);
72 auto&& V = slice(A, range{ilo + 1, ihi}, range{ilo, ilo + nb});
73 auto&& T_s = slice(A, range{0, nb}, range{0, nb});
74 auto&& A5 = slice(A, range{ilo + 1, ihi}, range{ilo + nb, n});
76 workinfo.minMax(larfb_worksize<T>(LEFT_SIDE, CONJ_TRANS,
77 FORWARD, COLUMNWISE_STORAGE,
81 workinfo.minMax(gehd2_worksize<T>(ilo, ihi, A, tau));
106 using idx_t = size_type<matrix_t>;
107 using T = type_t<work_t>;
108 using range = pair<idx_t, idx_t>;
109 using TA = type_t<matrix_t>;
110 using real_t = real_type<TA>;
115 const idx_t n = ncols(A);
118 const idx_t nb = (ilo < ihi) ? min<idx_t>(opts.nb, ihi - ilo - 1) : 0;
120 const idx_t nx = max(nb, (idx_t)opts.nx_switch);
129 if (n <= 0)
return 0;
132 auto [Y, work2] = ((ilo < ihi) && (nx < ihi - ilo - 1))
133 ? reshape(work, n, nb)
134 : reshape(work, 0, 0);
135 auto Yt = transpose_view(Y);
136 auto [matrixT, work3] = ((ilo < ihi) && (nx < ihi - ilo - 1))
137 ? reshape(work2, nb, nb)
138 : reshape(work2, 0, 0);
139 laset(GENERAL, zero, zero, Y);
142 for (; i + nx < ihi - 1; i = i + nb) {
143 const idx_t nb2 = min(nb, ihi - i - 1);
145 auto V = slice(A, range{i + 1, ihi}, range{i, i + nb2});
146 auto A2 = slice(A, range{0, ihi}, range{i, ihi});
147 auto tau2 = slice(tau, range{i, ihi});
148 auto T_s = slice(matrixT, range{0, nb2}, range{0, nb2});
149 auto Y_s = slice(Y, range{0, n}, range{0, nb2});
150 lahr2(i, nb2, A2, tau2, T_s, Y_s);
153 auto V2 = slice(V, range{nb2 - 1, ihi - i - 1}, range{0, nb2});
158 const TA ei = V(nb2 - 1, nb2 - 1);
159 V(nb2 - 1, nb2 - 1) = one;
160 auto A3 = slice(A, range{0, ihi}, range{i + nb2, ihi});
161 auto Y_2 = slice(Y, range{0, ihi}, range{0, nb2});
162 gemm(NO_TRANS, CONJ_TRANS, -one, Y_2, V2, one, A3);
163 V(nb2 - 1, nb2 - 1) = ei;
166 auto V1 = slice(A, range{i + 1, i + nb2 + 1}, range{i, i + nb2});
167 trmm(RIGHT_SIDE, LOWER_TRIANGLE, CONJ_TRANS, UNIT_DIAG, one, V1, Y_s);
168 for (idx_t j = 0; j < nb2 - 1; ++j) {
169 auto A4 = slice(A, range{0, i + 1}, i + j + 1);
170 axpy(-one, slice(Y, range{0, i + 1}, j), A4);
174 auto A5 = slice(A, range{i + 1, ihi}, range{i + nb2, n});
175 larfb_work(LEFT_SIDE, CONJ_TRANS, FORWARD, COLUMNWISE_STORAGE, V, T_s,
221template <TLAPACK_SMATRIX matrix_t, TLAPACK_SVECTOR vector_t>
228 using idx_t = size_type<matrix_t>;
229 using work_t = matrix_type<matrix_t, vector_t>;
230 using T = type_t<work_t>;
233 Create<work_t> new_matrix;
236 const idx_t n = ncols(A);
239 if (n <= 0)
return 0;
242 WorkInfo workinfo = gehrd_worksize<T>(ilo, ihi, A, tau, opts);
243 std::vector<T> work_;
244 auto work = new_matrix(work_, workinfo.m, workinfo.n);
246 return gehrd_work(ilo, ihi, A, tau, work, opts);
#define TLAPACK_SVECTOR
Macro for tlapack::concepts::SliceableVector compatible with C++17.
Definition concepts.hpp:909
#define TLAPACK_SMATRIX
Macro for tlapack::concepts::SliceableMatrix compatible with C++17.
Definition concepts.hpp:899
#define TLAPACK_WORKSPACE
Macro for tlapack::concepts::Workspace compatible with C++17.
Definition concepts.hpp:912
int gehrd(size_type< matrix_t > ilo, size_type< matrix_t > ihi, matrix_t &A, vector_t &tau, const GehrdOpts &opts={})
Reduces a general square matrix to upper Hessenberg form.
Definition gehrd.hpp:222
void transpose(matrixA_t &A, matrixB_t &B, const TransposeOpts &opts={})
transpose a matrix A into a matrix B.
Definition transpose.hpp:92
void laset(uplo_t uplo, const type_t< matrix_t > &alpha, const type_t< matrix_t > &beta, matrix_t &A)
Initializes a matrix to diagonal and off-diagonal values.
Definition laset.hpp:38
int larfb_work(side_t side, trans_t trans, direction_t direction, storage_t storeMode, const matrixV_t &V, const matrixT_t &Tmatrix, matrixC_t &C, work_t &work)
Applies a block reflector or its conjugate transpose to a m-by-n matrix C, from either the left or ...
Definition larfb.hpp:111
int lahr2(size_type< matrix_t > k, size_type< matrix_t > nb, matrix_t &A, vector_t &tau, matrixT_t &T, matrixY_t &Y)
Reduces a general square matrix to upper Hessenberg form.
Definition lahr2.hpp:61
void axpy(const alpha_t &alpha, const vectorX_t &x, vectorY_t &y)
Add scaled vector, .
Definition axpy.hpp:34
void trmm(Side side, Uplo uplo, Op trans, Diag diag, const alpha_t &alpha, const matrixA_t &A, matrixB_t &B)
Triangular matrix-matrix multiply:
Definition trmm.hpp:72
void gemm(Op transA, Op transB, const alpha_t &alpha, const matrixA_t &A, const matrixB_t &B, const beta_t &beta, matrixC_t &C)
General matrix-matrix multiply:
Definition gemm.hpp:61
int gehrd_work(size_type< matrix_t > ilo, size_type< matrix_t > ihi, matrix_t &A, vector_t &tau, work_t &work, const GehrdOpts &opts={})
Reduces a general square matrix to upper Hessenberg form. Workspace is provided as an argument.
Definition gehrd.hpp:99
int gehd2_work(size_type< matrix_t > ilo, size_type< matrix_t > ihi, matrix_t &A, vector_t &tau, work_t &work)
Reduces a general square matrix to upper Hessenberg form. Workspace is provided as an argument.
Definition gehd2.hpp:76
#define tlapack_check_false(cond)
Throw an error if cond is true.
Definition exceptionHandling.hpp:113
constexpr WorkInfo gehrd_worksize(size_type< matrix_t > ilo, size_type< matrix_t > ihi, const matrix_t &A, const vector_t &tau, const GehrdOpts &opts={})
Worspace query of gehrd()
Definition gehrd.hpp:52
Applies a Householder block reflector to a matrix.
typename traits::real_type_traits< Types..., int >::type real_type
The common real type of the list of types.
Definition scalar_type_traits.hpp:113
Options struct for gehrd.
Definition gehrd.hpp:27
size_t nb
Block size used in the blocked reduction.
Definition gehrd.hpp:28
size_t nx_switch
If only nx_switch columns are left, the algorithm will use unblocked code.
Definition gehrd.hpp:29
Output information in the workspace query.
Definition workspace.hpp:16