<T>LAPACK 0.1.1
C++ Template Linear Algebra PACKage
Loading...
Searching...
No Matches
gehrd.hpp
Go to the documentation of this file.
1
5//
6// Copyright (c) 2021-2023, University of Colorado Denver. All rights reserved.
7//
8// This file is part of <T>LAPACK.
9// <T>LAPACK is free software: you can redistribute it and/or modify it under
10// the terms of the BSD 3-Clause license. See the accompanying LICENSE file.
11
12#ifndef TLAPACK_GEHRD_HH
13#define TLAPACK_GEHRD_HH
14
16#include "tlapack/blas/gemm.hpp"
21
22namespace tlapack {
23
27struct GehrdOpts {
28 size_t nb = 32;
29 size_t nx_switch = 128;
31};
32
51template <class T, TLAPACK_SMATRIX matrix_t, TLAPACK_SVECTOR vector_t>
54 const matrix_t& A,
55 const vector_t& tau,
56 const GehrdOpts& opts = {})
57{
58 using idx_t = size_type<matrix_t>;
59 using work_t = matrix_type<matrix_t, vector_t>;
60 using range = pair<idx_t, idx_t>;
61
62 const idx_t n = ncols(A);
63 const idx_t nb = (ilo < ihi) ? min<idx_t>(opts.nb, ihi - ilo - 1) : 0;
64 const idx_t nx = max<idx_t>(nb, opts.nx_switch);
65
66 WorkInfo workinfo;
67 if constexpr (is_same_v<T, type_t<work_t>>) {
68 if (n > 0) {
69 if ((ilo < ihi) && (nx < ihi - ilo - 1)) {
70 workinfo = WorkInfo(n + nb, nb);
71
72 auto&& V = slice(A, range{ilo + 1, ihi}, range{ilo, ilo + nb});
73 auto&& T_s = slice(A, range{0, nb}, range{0, nb});
74 auto&& A5 = slice(A, range{ilo + 1, ihi}, range{ilo + nb, n});
75
76 workinfo.minMax(larfb_worksize<T>(LEFT_SIDE, CONJ_TRANS,
77 FORWARD, COLUMNWISE_STORAGE,
78 V, T_s, A5)
79 .transpose());
80 }
81 workinfo.minMax(gehd2_worksize<T>(ilo, ihi, A, tau));
82 }
83 }
84
85 return workinfo;
86}
87
96template <TLAPACK_SMATRIX matrix_t,
97 TLAPACK_SVECTOR vector_t,
98 TLAPACK_WORKSPACE work_t>
101 matrix_t& A,
102 vector_t& tau,
103 work_t& work,
104 const GehrdOpts& opts = {})
105{
106 using idx_t = size_type<matrix_t>;
107 using T = type_t<work_t>;
108 using range = pair<idx_t, idx_t>;
109 using TA = type_t<matrix_t>;
110 using real_t = real_type<TA>;
111
112 // constants
113 const real_t one(1);
114 const T zero(0);
115 const idx_t n = ncols(A);
116
117 // Blocksize
118 const idx_t nb = (ilo < ihi) ? min<idx_t>(opts.nb, ihi - ilo - 1) : 0;
119 // Size of the last block which be handled with unblocked code
120 const idx_t nx = max(nb, (idx_t)opts.nx_switch);
121
122 // check arguments
123 tlapack_check_false((ilo < 0) or (ilo >= n));
124 tlapack_check_false((ihi < 0) or (ihi > n));
125 tlapack_check_false(ncols(A) != nrows(A));
126 tlapack_check_false((idx_t)size(tau) < n - 1);
127
128 // quick return
129 if (n <= 0) return 0;
130
131 // Matrices Y and T
132 auto [Y, work2] = ((ilo < ihi) && (nx < ihi - ilo - 1))
133 ? reshape(work, n, nb)
134 : reshape(work, 0, 0);
135 auto Yt = transpose_view(Y);
136 auto [matrixT, work3] = ((ilo < ihi) && (nx < ihi - ilo - 1))
137 ? reshape(work2, nb, nb)
138 : reshape(work2, 0, 0);
139 laset(GENERAL, zero, zero, Y);
140
141 idx_t i = ilo;
142 for (; i + nx < ihi - 1; i = i + nb) {
143 const idx_t nb2 = min(nb, ihi - i - 1);
144
145 auto V = slice(A, range{i + 1, ihi}, range{i, i + nb2});
146 auto A2 = slice(A, range{0, ihi}, range{i, ihi});
147 auto tau2 = slice(tau, range{i, ihi});
148 auto T_s = slice(matrixT, range{0, nb2}, range{0, nb2});
149 auto Y_s = slice(Y, range{0, n}, range{0, nb2});
150 lahr2(i, nb2, A2, tau2, T_s, Y_s);
151 if (i + nb2 < ihi) {
152 // Note, this V2 contains the last row of the triangular part
153 auto V2 = slice(V, range{nb2 - 1, ihi - i - 1}, range{0, nb2});
154
155 // Apply the block reflector H to A(0:ihi,i+nb:ihi) from the right,
156 // computing A := A - Y * V**T. The multiplication requires
157 // V(nb2-1,nb2-1) to be set to 1.
158 const TA ei = V(nb2 - 1, nb2 - 1);
159 V(nb2 - 1, nb2 - 1) = one;
160 auto A3 = slice(A, range{0, ihi}, range{i + nb2, ihi});
161 auto Y_2 = slice(Y, range{0, ihi}, range{0, nb2});
162 gemm(NO_TRANS, CONJ_TRANS, -one, Y_2, V2, one, A3);
163 V(nb2 - 1, nb2 - 1) = ei;
164 }
165 // Apply the block reflector H to A(0:i+1,i+1:i+ib) from the right
166 auto V1 = slice(A, range{i + 1, i + nb2 + 1}, range{i, i + nb2});
167 trmm(RIGHT_SIDE, LOWER_TRIANGLE, CONJ_TRANS, UNIT_DIAG, one, V1, Y_s);
168 for (idx_t j = 0; j < nb2 - 1; ++j) {
169 auto A4 = slice(A, range{0, i + 1}, i + j + 1);
170 axpy(-one, slice(Y, range{0, i + 1}, j), A4);
171 }
172
173 // Apply the block reflector H to A(i+1:ihi,i+nb:n) from the left
174 auto A5 = slice(A, range{i + 1, ihi}, range{i + nb2, n});
175 larfb_work(LEFT_SIDE, CONJ_TRANS, FORWARD, COLUMNWISE_STORAGE, V, T_s,
176 A5, Yt);
177 }
178
179 return gehd2_work(i, ihi, A, tau, work);
180}
181
221template <TLAPACK_SMATRIX matrix_t, TLAPACK_SVECTOR vector_t>
224 matrix_t& A,
225 vector_t& tau,
226 const GehrdOpts& opts = {})
227{
228 using idx_t = size_type<matrix_t>;
229 using work_t = matrix_type<matrix_t, vector_t>;
230 using T = type_t<work_t>;
231
232 // Functor
233 Create<work_t> new_matrix;
234
235 // constants
236 const idx_t n = ncols(A);
237
238 // quick return
239 if (n <= 0) return 0;
240
241 // Allocates workspace
242 WorkInfo workinfo = gehrd_worksize<T>(ilo, ihi, A, tau, opts);
243 std::vector<T> work_;
244 auto work = new_matrix(work_, workinfo.m, workinfo.n);
245
246 return gehrd_work(ilo, ihi, A, tau, work, opts);
247}
248
249} // namespace tlapack
250
251#endif // TLAPACK_GEHRD_HH
#define TLAPACK_SVECTOR
Macro for tlapack::concepts::SliceableVector compatible with C++17.
Definition concepts.hpp:909
#define TLAPACK_SMATRIX
Macro for tlapack::concepts::SliceableMatrix compatible with C++17.
Definition concepts.hpp:899
#define TLAPACK_WORKSPACE
Macro for tlapack::concepts::Workspace compatible with C++17.
Definition concepts.hpp:912
int gehrd(size_type< matrix_t > ilo, size_type< matrix_t > ihi, matrix_t &A, vector_t &tau, const GehrdOpts &opts={})
Reduces a general square matrix to upper Hessenberg form.
Definition gehrd.hpp:222
void transpose(matrixA_t &A, matrixB_t &B, const TransposeOpts &opts={})
transpose a matrix A into a matrix B.
Definition transpose.hpp:92
void laset(uplo_t uplo, const type_t< matrix_t > &alpha, const type_t< matrix_t > &beta, matrix_t &A)
Initializes a matrix to diagonal and off-diagonal values.
Definition laset.hpp:38
int larfb_work(side_t side, trans_t trans, direction_t direction, storage_t storeMode, const matrixV_t &V, const matrixT_t &Tmatrix, matrixC_t &C, work_t &work)
Applies a block reflector or its conjugate transpose to a m-by-n matrix C, from either the left or ...
Definition larfb.hpp:111
int lahr2(size_type< matrix_t > k, size_type< matrix_t > nb, matrix_t &A, vector_t &tau, matrixT_t &T, matrixY_t &Y)
Reduces a general square matrix to upper Hessenberg form.
Definition lahr2.hpp:61
void axpy(const alpha_t &alpha, const vectorX_t &x, vectorY_t &y)
Add scaled vector, .
Definition axpy.hpp:34
void trmm(Side side, Uplo uplo, Op trans, Diag diag, const alpha_t &alpha, const matrixA_t &A, matrixB_t &B)
Triangular matrix-matrix multiply:
Definition trmm.hpp:72
void gemm(Op transA, Op transB, const alpha_t &alpha, const matrixA_t &A, const matrixB_t &B, const beta_t &beta, matrixC_t &C)
General matrix-matrix multiply:
Definition gemm.hpp:61
int gehrd_work(size_type< matrix_t > ilo, size_type< matrix_t > ihi, matrix_t &A, vector_t &tau, work_t &work, const GehrdOpts &opts={})
Reduces a general square matrix to upper Hessenberg form. Workspace is provided as an argument.
Definition gehrd.hpp:99
int gehd2_work(size_type< matrix_t > ilo, size_type< matrix_t > ihi, matrix_t &A, vector_t &tau, work_t &work)
Reduces a general square matrix to upper Hessenberg form. Workspace is provided as an argument.
Definition gehd2.hpp:76
#define tlapack_check_false(cond)
Throw an error if cond is true.
Definition exceptionHandling.hpp:113
constexpr WorkInfo gehrd_worksize(size_type< matrix_t > ilo, size_type< matrix_t > ihi, const matrix_t &A, const vector_t &tau, const GehrdOpts &opts={})
Worspace query of gehrd()
Definition gehrd.hpp:52
Applies a Householder block reflector to a matrix.
typename traits::real_type_traits< Types..., int >::type real_type
The common real type of the list of types.
Definition scalar_type_traits.hpp:113
Options struct for gehrd.
Definition gehrd.hpp:27
size_t nb
Block size used in the blocked reduction.
Definition gehrd.hpp:28
size_t nx_switch
If only nx_switch columns are left, the algorithm will use unblocked code.
Definition gehrd.hpp:29
Output information in the workspace query.
Definition workspace.hpp:16