<T>LAPACK 0.1.1
C++ Template Linear Algebra PACKage
Loading...
Searching...
No Matches
ungq.hpp
Go to the documentation of this file.
1
6//
7// Copyright (c) 2021-2023, University of Colorado Denver. All rights reserved.
8//
9// This file is part of <T>LAPACK.
10// <T>LAPACK is free software: you can redistribute it and/or modify it under
11// the terms of the BSD 3-Clause license. See the accompanying LICENSE file.
12
13#ifndef TLAPACK_UNGQ_HH
14#define TLAPACK_UNGQ_HH
15
20
21namespace tlapack {
22
26struct UngqOpts {
27 size_t nb = 32;
28};
29
54template <class T,
61 const matrix_t& A,
62 const vector_t& tau,
63 const UngqOpts& opts = {})
64{
65 using idx_t = size_type<matrix_t>;
66 using work_t = matrix_type<matrix_t, vector_t>;
67 using range = pair<idx_t, idx_t>;
68
69 // Constants
70 const idx_t m = nrows(A);
71 const idx_t n = ncols(A);
72 const idx_t k = size(tau);
73 const idx_t nb = min((idx_t)opts.nb, k);
74
75 WorkInfo workinfo;
76 auto&& V = (storeMode == StoreV::Columnwise)
77 ? slice(A, range{0, m}, range{0, nb})
78 : slice(A, range{0, nb}, range{0, n});
79
80 if (storeMode == StoreV::Columnwise) {
81 // larfb:
82 if (nb < n) {
83 // Empty matrices
84 auto&& matrixT = slice(A, range{0, nb}, range{0, nb});
85 auto&& C = slice(A, range{0, m},
86 (direction == Direction::Forward)
87 ? range{nb, n}
88 : range{0, (n - k) + ((k - 1) / nb) * nb});
89
90 // Internal workspace queries
91 workinfo = larfb_worksize<T>(LEFT_SIDE, NO_TRANS, direction,
92 COLUMNWISE_STORAGE, V, matrixT, C);
93
94 // Local workspace sizes
95 if (is_same_v<T, type_t<work_t>>) workinfo += WorkInfo(nb, nb);
96 }
97 }
98 else {
99 // larfb:
100 if (nb < m) {
101 // Empty matrices
102 auto&& matrixT = slice(A, range{0, nb}, range{0, nb});
103 auto&& C = slice(A,
104 (direction == Direction::Forward)
105 ? range{nb, m}
106 : range{0, (m - k) + ((k - 1) / nb) * nb},
107 range{0, n});
108
109 // Internal workspace queries
110 workinfo = larfb_worksize<T>(RIGHT_SIDE, CONJ_TRANS, direction,
111 ROWWISE_STORAGE, V, matrixT, C);
112
113 // Local workspace sizes
114 if (is_same_v<T, type_t<work_t>>) workinfo += WorkInfo(nb, nb);
115 }
116 }
117
118 // ungq_level2:
119 auto&& taui = slice(tau, range{0, nb});
120 workinfo.minMax(ungq_level2_worksize<T>(direction, storeMode, V, taui));
121
122 return workinfo;
123}
124
133template <TLAPACK_SMATRIX matrix_t,
134 TLAPACK_SVECTOR vector_t,
135 TLAPACK_DIRECTION direction_t,
136 TLAPACK_STOREV storage_t,
137 TLAPACK_WORKSPACE work_t>
138int ungq_work(direction_t direction,
140 matrix_t& A,
141 const vector_t& tau,
142 work_t& work,
143 const UngqOpts& opts = {})
144{
145 using T = type_t<matrix_t>;
146 using real_t = real_type<T>;
147 using idx_t = size_type<matrix_t>;
148 using range = pair<idx_t, idx_t>;
149
150 // constants
151 const real_t zero(0);
152 const real_t one(1);
153 const idx_t m = nrows(A);
154 const idx_t n = ncols(A);
155 const idx_t k = size(tau);
156 const idx_t nb = min((idx_t)opts.nb, k);
157
158 // check arguments
159 tlapack_check_false(direction != Direction::Backward &&
160 direction != Direction::Forward);
161 tlapack_check((storeMode == StoreV::Columnwise) ? (m >= n && n >= k)
162 : (n >= m && m >= k));
163
164 // quick return
165 if (m <= 0 || n <= 0) return 0;
166
167 // Matrix matrixT
168 auto [matrixT, work1] =
169 (min(m, n) > nb) ? reshape(work, nb, nb) : reshape(work, 0, 0);
170
171 if (storeMode == StoreV::Columnwise) {
172 if (direction == Direction::Forward) {
173 // Initialize unit matrix
174 auto A0 = slice(A, range{0, k}, range{k, n});
175 auto A1 = slice(A, range{k, m}, range{k, n});
176 laset(GENERAL, zero, zero, A0);
177 laset(GENERAL, zero, one, A1);
178
179 for (idx_t j = ((k - 1) / nb) * nb; j != -nb; j -= nb) {
180 const idx_t ib = min(nb, k - j);
181 const auto tauj = slice(tau, range{j, j + ib});
182
183 auto V = slice(A, range{j, m}, range{j, j + ib});
184 if (j + ib < n) {
185 // Apply block reflector to A( j:m, j+ib:n )$ from the left
186
187 auto matrixTj = slice(matrixT, range{0, ib}, range{0, ib});
188 auto C = slice(A, range{j, m}, range{j + ib, n});
189
190 larft(FORWARD, COLUMNWISE_STORAGE, V, tauj, matrixTj);
191 larfb_work(LEFT_SIDE, NO_TRANS, FORWARD, COLUMNWISE_STORAGE,
192 V, matrixTj, C, work1);
193 }
194
195 // Apply block reflector to A( 0:m, j:j+ib )$ from the left
196 // using unblocked code.
197 auto A0 = slice(A, range{0, j}, range{j, j + ib});
198 laset(GENERAL, zero, zero, A0);
199 ungq_level2_work(FORWARD, COLUMNWISE_STORAGE, V, tauj, work);
200 }
201 }
202 else {
203 // Initialize unit matrix
204 auto A0 = slice(A, range{0, m - n}, range{0, n - k});
205 auto A1 = slice(A, range{m - n, m - k}, range{0, n - k});
206 auto A2 = slice(A, range{m - k, m}, range{0, n - k});
207 laset(GENERAL, zero, zero, A0);
208 laset(GENERAL, zero, one, A1);
209 laset(GENERAL, zero, zero, A2);
210
211 for (idx_t j = 0; j < k; j += nb) {
212 const idx_t ib = min(nb, k - j);
213 const idx_t sizev = m - k + j + ib;
214 const idx_t jj = n - k + j;
215 const auto tauj = slice(tau, range{j, j + ib});
216
217 auto V = slice(A, range{0, sizev}, range{jj, jj + ib});
218 if (jj > 0) {
219 // Apply block reflector to A( 0:sizev, 0:jj )$ from the
220 // left
221
222 auto matrixTj = slice(matrixT, range{0, ib}, range{0, ib});
223 auto C = slice(A, range{0, sizev}, range{0, jj});
224
225 larft(BACKWARD, COLUMNWISE_STORAGE, V, tauj, matrixTj);
226 larfb_work(LEFT_SIDE, NO_TRANS, BACKWARD,
227 COLUMNWISE_STORAGE, V, matrixTj, C, work1);
228 }
229
230 // Apply block reflector to A( 0:m, jj:jj+ib )$ from the left
231 // using unblocked code.
232 auto A0 = slice(A, range{sizev, m}, range{jj, jj + ib});
233 ungq_level2_work(BACKWARD, COLUMNWISE_STORAGE, V, tauj, work);
234 laset(GENERAL, zero, zero, A0);
235 }
236 }
237 }
238 else {
239 if (direction == Direction::Forward) {
240 // Initialize unit matrix
241 auto A0 = slice(A, range{k, m}, range{0, k});
242 auto A1 = slice(A, range{k, m}, range{k, n});
243 laset(GENERAL, zero, zero, A0);
244 laset(GENERAL, zero, one, A1);
245
246 for (idx_t i = ((k - 1) / nb) * nb; i != -nb; i -= nb) {
247 const idx_t ib = min(nb, k - i);
248 const auto taui = slice(tau, range{i, i + ib});
249
250 auto V = slice(A, range{i, i + ib}, range{i, n});
251 if (i + ib < m) {
252 // Apply block reflector to A( i+ib:m, i:n )$ from the right
253
254 auto matrixTi = slice(matrixT, range{0, ib}, range{0, ib});
255 auto C = slice(A, range{i + ib, m}, range{i, n});
256
257 larft(FORWARD, ROWWISE_STORAGE, V, taui, matrixTi);
258 larfb_work(RIGHT_SIDE, CONJ_TRANS, FORWARD, ROWWISE_STORAGE,
259 V, matrixTi, C, work1);
260 }
261
262 // Apply block reflector to A( i:i+ib, 0:n )$ from the right
263 // using unblocked code.
264 auto A0 = slice(A, range{i, i + ib}, range{0, i});
265 laset(GENERAL, zero, zero, A0);
266 ungq_level2_work(FORWARD, ROWWISE_STORAGE, V, taui, work);
267 }
268 }
269 else {
270 // Initialize unit matrix
271 auto A0 = slice(A, range{0, m - k}, range{0, n - m});
272 auto A1 = slice(A, range{0, m - k}, range{n - m, n - k});
273 auto A2 = slice(A, range{0, m - k}, range{n - k, n});
274 laset(GENERAL, zero, zero, A0);
275 laset(GENERAL, zero, one, A1);
276 laset(GENERAL, zero, zero, A2);
277
278 for (idx_t i = 0; i < k; i += nb) {
279 const idx_t ib = min(nb, k - i);
280 const idx_t sizev = n - k + i + ib;
281 const idx_t ii = m - k + i;
282 const auto taui = slice(tau, range{i, i + ib});
283
284 auto V = slice(A, range{ii, ii + ib}, range{0, sizev});
285 if (ii > 0) {
286 // Apply block reflector to A( 0:ii, 0:sizev )$ from the
287 // left
288
289 auto matrixTi = slice(matrixT, range{0, ib}, range{0, ib});
290 auto C = slice(A, range{0, ii}, range{0, sizev});
291
292 larft(BACKWARD, ROWWISE_STORAGE, V, taui, matrixTi);
293 larfb_work(RIGHT_SIDE, CONJ_TRANS, BACKWARD,
294 ROWWISE_STORAGE, V, matrixTi, C, work1);
295 }
296
297 // Apply block reflector to A( ii:ii+ib, 0:n )$ from the left
298 // using unblocked code.
299 auto A0 = slice(A, range{ii, ii + ib}, range{sizev, n});
300 ungq_level2_work(BACKWARD, ROWWISE_STORAGE, V, taui, work);
301 laset(GENERAL, zero, zero, A0);
302 }
303 }
304 }
305
306 return 0;
307}
308
348template <TLAPACK_SMATRIX matrix_t,
349 TLAPACK_SVECTOR vector_t,
350 TLAPACK_DIRECTION direction_t,
351 TLAPACK_STOREV storage_t>
352int ungq(direction_t direction,
354 matrix_t& A,
355 const vector_t& tau,
356 const UngqOpts& opts = {})
357{
358 using T = type_t<matrix_t>;
359 using idx_t = size_type<matrix_t>;
360 using work_t = matrix_type<matrix_t, vector_t>;
361
362 // Functor
363 Create<work_t> new_matrix;
364
365 // constants
366 const idx_t m = nrows(A);
367 const idx_t n = ncols(A);
368
369 // quick return
370 if (m <= 0 || n <= 0) return 0;
371
372 // Allocates workspace
373 WorkInfo workinfo = ungq_worksize<T>(direction, storeMode, A, tau, opts);
374 std::vector<T> work_;
375 auto work = new_matrix(work_, workinfo.m, workinfo.n);
376
377 return ungq_work(direction, storeMode, A, tau, work, opts);
378}
379
380} // namespace tlapack
381
382#endif // TLAPACK_UNGQ_HH
#define TLAPACK_SVECTOR
Macro for tlapack::concepts::SliceableVector compatible with C++17.
Definition concepts.hpp:909
#define TLAPACK_STOREV
Macro for tlapack::concepts::StoreV compatible with C++17.
Definition concepts.hpp:936
#define TLAPACK_SMATRIX
Macro for tlapack::concepts::SliceableMatrix compatible with C++17.
Definition concepts.hpp:899
#define TLAPACK_DIRECTION
Macro for tlapack::concepts::Direction compatible with C++17.
Definition concepts.hpp:930
#define TLAPACK_WORKSPACE
Macro for tlapack::concepts::Workspace compatible with C++17.
Definition concepts.hpp:912
int ungq(direction_t direction, storage_t storeMode, matrix_t &A, const vector_t &tau, const UngqOpts &opts={})
Generates a matrix Q that is the product of elementary reflectors.
Definition ungq.hpp:352
void laset(uplo_t uplo, const type_t< matrix_t > &alpha, const type_t< matrix_t > &beta, matrix_t &A)
Initializes a matrix to diagonal and off-diagonal values.
Definition laset.hpp:38
int larfb_work(side_t side, trans_t trans, direction_t direction, storage_t storeMode, const matrixV_t &V, const matrixT_t &Tmatrix, matrixC_t &C, work_t &work)
Applies a block reflector or its conjugate transpose to a m-by-n matrix C, from either the left or ...
Definition larfb.hpp:111
int larft(direction_t direction, storage_t storeMode, const matrixV_t &V, const vector_t &tau, matrixT_t &T)
Forms the triangular factor T of a block reflector H of order n, which is defined as a product of k e...
Definition larft.hpp:92
int ungq_work(direction_t direction, storage_t storeMode, matrix_t &A, const vector_t &tau, work_t &work, const UngqOpts &opts={})
Generates a matrix Q that is the product of elementary reflectors. Workspace is provided as an argu...
Definition ungq.hpp:138
int ungq_level2_work(direction_t direction, storage_t storeMode, matrix_t &A, const vector_t &tau, work_t &work)
Generates a matrix Q that is the product of elementary reflectors. Workspace is provided as an argu...
Definition ungq_level2.hpp:90
#define tlapack_check(cond)
Throw an error if cond is false.
Definition exceptionHandling.hpp:98
#define tlapack_check_false(cond)
Throw an error if cond is true.
Definition exceptionHandling.hpp:113
constexpr WorkInfo ungq_worksize(direction_t direction, storage_t storeMode, const matrix_t &A, const vector_t &tau, const UngqOpts &opts={})
Worspace query of ungq()
Definition ungq.hpp:59
Applies a Householder block reflector to a matrix.
Forms the triangular factor T of a block reflector.
typename traits::real_type_traits< Types..., int >::type real_type
The common real type of the list of types.
Definition scalar_type_traits.hpp:113
Options struct for ungq.
Definition ungq.hpp:26
size_t nb
Block size.
Definition ungq.hpp:27
Output information in the workspace query.
Definition workspace.hpp:16