<T>LAPACK 0.1.1
C++ Template Linear Algebra PACKage
Loading...
Searching...
No Matches
multishift_qr.hpp
Go to the documentation of this file.
1
5//
6// Copyright (c) 2021-2023, University of Colorado Denver. All rights reserved.
7//
8// This file is part of <T>LAPACK.
9// <T>LAPACK is free software: you can redistribute it and/or modify it under
10// the terms of the BSD 3-Clause license. See the accompanying LICENSE file.
11
12#ifndef TLAPACK_MULTISHIFT_QR_HH
13#define TLAPACK_MULTISHIFT_QR_HH
14
19
20namespace tlapack {
21
22template <class T,
23 TLAPACK_SMATRIX matrix_t,
24 TLAPACK_SVECTOR vector_t,
25 enable_if_t<is_complex<type_t<vector_t>>, int> = 0>
26constexpr WorkInfo multishift_qr_worksize_sweep(bool want_t,
27 bool want_z,
28 size_type<matrix_t> ilo,
29 size_type<matrix_t> ihi,
30 const matrix_t& A,
31 const vector_t& w,
32 const matrix_t& Z,
33 const FrancisOpts& opts = {})
34{
35 using idx_t = size_type<matrix_t>;
36 using range = pair<idx_t, idx_t>;
37
38 const idx_t n = ncols(A);
39 const idx_t nh = ihi - ilo;
40 const idx_t nsr = opts.nshift_recommender(n, nh);
41 auto&& shifts = slice(w, range{0, nsr});
42
43 return multishift_QR_sweep_worksize<T>(want_t, want_z, ilo, ihi, A, shifts,
44 Z);
45}
46
68template <class T,
69 TLAPACK_SMATRIX matrix_t,
70 TLAPACK_SVECTOR vector_t,
71 enable_if_t<is_complex<type_t<vector_t>>, int>>
73 bool want_z,
76 const matrix_t& A,
77 const vector_t& w,
78 const matrix_t& Z,
79 const FrancisOpts& opts)
80{
81 using idx_t = size_type<matrix_t>;
82
83 const idx_t n = ncols(A);
84
85 // quick return
87 if (ilo + 1 >= ihi || n < (idx_t)opts.nmin) return workinfo;
88
89 {
90 const idx_t nw_max = (n - 3) / 3;
91
93 want_t, want_z, ilo, ihi, nw_max, A, w, Z, 0, 0, opts);
94 }
95
97 w, Z, opts));
98
99 return workinfo;
100}
101
110template <TLAPACK_SMATRIX matrix_t,
111 TLAPACK_SVECTOR vector_t,
112 TLAPACK_WORKSPACE work_t,
113 enable_if_t<is_complex<type_t<vector_t>>, int>>
115 bool want_z,
118 matrix_t& A,
119 vector_t& w,
120 matrix_t& Z,
121 work_t& work,
123{
124 using TA = type_t<matrix_t>;
125 using real_t = real_type<TA>;
126 using idx_t = size_type<matrix_t>;
128
129 // constants
130 const real_t zero(0);
131 const idx_t non_convergence_limit_window = 5;
132 const idx_t non_convergence_limit_shift = 6;
133 const real_t dat1(0.75);
134 const real_t dat2(-0.4375);
135
136 const idx_t n = ncols(A);
137 const idx_t nh = ihi - ilo;
138
139 // This routine uses the space below the subdiagonal as workspace
140 // For small matrices, this is not enough
141 // if n < nmin, the matrix will be passed to lahqr
142 const idx_t nmin = opts.nmin;
143
144 // Recommended number of shifts
145 const idx_t nsr = opts.nshift_recommender(n, nh);
146
147 // Recommended deflation window size
148 const idx_t nwr = opts.deflation_window_recommender(n, nh);
149 const idx_t nw_max = (n - 3) / 3;
150
151 const idx_t nibble = opts.nibble;
152
153 int n_aed = 0;
154 int n_sweep = 0;
155 int n_shifts_total = 0;
156
157 // check arguments
158 tlapack_check_false(n != nrows(A));
159 tlapack_check_false((idx_t)size(w) != n);
160 if (want_z) {
161 tlapack_check_false((n != ncols(Z)) or (n != nrows(Z)));
162 }
163
164 // quick return
165 if (nh <= 0) return 0;
166 if (nh == 1) w[ilo] = A(ilo, ilo);
167
168 // Tiny matrices must use lahqr
169 if (n < nmin) {
170 return lahqr(want_t, want_z, ilo, ihi, A, w, Z);
171 }
172
173 // itmax is the total number of QR iterations allowed.
174 // For most matrices, 3 shifts per eigenvalue is enough, so
175 // we set itmax to 30 times nh as a safe limit.
176 const idx_t itmax = 30 * std::max<idx_t>(10, nh);
177
178 // k_defl counts the number of iterations since a deflation
179 idx_t k_defl = 0;
180
181 // istop is the end of the active subblock.
182 // As more and more eigenvalues converge, it eventually
183 // becomes ilo+1 and the loop ends.
184 idx_t istop = ihi;
185
186 int info = 0;
187
188 // nw is the deflation window size
189 idx_t nw;
190
191 for (idx_t iter = 0; iter <= itmax; ++iter) {
192 if (iter == itmax) {
193 // The QR algorithm failed to converge, return with error.
194 info = istop;
195 break;
196 }
197
198 if (ilo + 1 >= istop) {
199 if (ilo + 1 == istop) w[ilo] = A(ilo, ilo);
200 // All eigenvalues have been found, exit and return 0.
201 break;
202 }
203
204 // istart is the start of the active subblock. Either
205 // istart = ilo, or H(istart, istart-1) = 0. This means
206 // that we can treat this subblock separately.
207 idx_t istart = ilo;
208
209 // Find active block
210 for (idx_t i = istop - 1; i > ilo; --i) {
211 if (A(i, i - 1) == zero) {
212 istart = i;
213 break;
214 }
215 }
216 //
217 // Agressive early deflation
218 //
219 idx_t nh = istop - istart;
220 idx_t nwupbd = min(nh, nw_max);
222 nw = min(nwupbd, nwr);
223 }
224 else {
225 // There have been no deflations in many iterations
226 // Try to vary the deflation window size.
227 nw = min(nwupbd, 2 * nw);
228 }
229 if (nh <= 4) {
230 // n >= nmin, so there is always enough space for a 4x4 window
231 nw = nh;
232 }
233 if (nw < nw_max) {
234 if (nw + 1 >= nh) nw = nh;
235 idx_t kwtop = istop - nw;
236 if (kwtop > istart + 2)
237 if (abs1(A(kwtop, kwtop - 1)) > abs1(A(kwtop - 1, kwtop - 2)))
238 nw = nw + 1;
239 }
240
241 idx_t ls, ld;
242 n_aed = n_aed + 1;
244 Z, ls, ld, work, opts);
245
246 istop = istop - ld;
247
248 if (ld > 0) k_defl = 0;
249
250 // Skip an expensive QR sweep if there is a (partly heuristic)
251 // reason to expect that many eigenvalues will deflate without it.
252 // Here, the QR sweep is skipped if many eigenvalues have just been
253 // deflated or if the remaining active block is small.
254 if (ld > 0 and
255 (100 * ld > nwr * nibble or istop <= istart + min(nmin, nw_max))) {
256 continue;
257 }
258
259 k_defl = k_defl + 1;
260 idx_t ns = min(nh - 1, min(ls, nsr));
261
262 idx_t i_shifts = istop - ls;
263
265 ns = nsr;
266 for (idx_t i = i_shifts; i < istop - 1; i = i + 2) {
267 real_t ss = abs1(A(i, i - 1));
268 if (i > 1) ss += abs1(A(i - 1, i - 2));
269 TA aa = dat1 * ss + A(i, i);
270 TA bb = ss;
271 TA cc = dat2 * ss;
272 TA dd = aa;
273 lahqr_eig22(aa, bb, cc, dd, w[i], w[i + 1]);
274 }
275 }
276 else {
277 if (ls < nsr / 2) {
278 // Got nsr/2 or fewer shifts? Then use multi/double shift qr to
279 // get more
280 auto temp = slice(A, range{n - nsr, n}, range{0, nsr});
281 auto shifts = slice(w, range{istop - nsr, istop});
282 auto Z_slice = slice(Z, range{0, nsr}, range{0, nsr});
283 int ierr = lahqr(false, false, 0, nsr, temp, shifts, Z_slice);
284
285 ns = nsr - ierr;
286
287 if (ns < 2) {
288 // In case of a rare QR failure, use eigenvalues
289 // of the trailing 2x2 submatrix
290 TA aa = A(istop - 2, istop - 2);
291 TA bb = A(istop - 2, istop - 1);
292 TA cc = A(istop - 1, istop - 2);
293 TA dd = A(istop - 1, istop - 1);
294 lahqr_eig22(aa, bb, cc, dd, w[istop - 2], w[istop - 1]);
295 ns = 2;
296 }
297
298 i_shifts = istop - ns;
299 }
300
301 // Sort the shifts (helps a little)
302 // Bubble sort keeps complex conjugate pairs together
303 bool sorted = false;
304 idx_t k = istop;
305 while (!sorted && k > i_shifts) {
306 sorted = true;
307 for (idx_t i = i_shifts; i < k - 1; ++i) {
308 if (abs1(w[i]) < abs1(w[i + 1])) {
309 sorted = false;
310 const type_t<vector_t> tmp = w[i];
311 w[i] = w[i + 1];
312 w[i + 1] = tmp;
313 }
314 }
315 --k;
316 }
317
318 // Shuffle shifts into pairs of real shifts
319 // and pairs of complex conjugate shifts
320 // assuming complex conjugate shifts are
321 // already adjacent to one another. (Yes,
322 // they are.)
323 for (idx_t i = istop - 1; i > i_shifts + 1; i = i - 2) {
324 if (imag(w[i]) != -imag(w[i - 1])) {
325 const type_t<vector_t> tmp = w[i];
326 w[i] = w[i - 1];
327 w[i - 1] = w[i - 2];
328 w[i - 2] = tmp;
329 }
330 }
331
332 // Since we shuffled the shifts, we will only drop
333 // Real shifts
334 if (ns % 2 == 1) ns = ns - 1;
335 i_shifts = istop - ns;
336 }
337
338 // If there are only two shifts and both are real
339 // then use only one (helps avoid interference)
340 if (is_real<TA>) {
341 if (ns == 2) {
342 if (imag(w[i_shifts]) == zero) {
343 if (abs(real(w[i_shifts]) - A(istop - 1, istop - 1)) <
344 abs(real(w[i_shifts + 1]) - A(istop - 1, istop - 1)))
345 w[i_shifts + 1] = w[i_shifts];
346 else
347 w[i_shifts] = w[i_shifts + 1];
348 }
349 }
350 }
351 auto shifts = slice(w, range{i_shifts, i_shifts + ns});
352
353 n_sweep = n_sweep + 1;
354 n_shifts_total = n_shifts_total + ns;
356 work);
357 }
358
359 opts.n_aed = n_aed;
360 opts.n_shifts_total = n_shifts_total;
361 opts.n_sweep = n_sweep;
362
363 return info;
364}
365
377template <TLAPACK_MATRIX matrix_t,
378 TLAPACK_VECTOR vector_t,
379 TLAPACK_WORKSPACE work_t,
380 enable_if_t<is_complex<type_t<vector_t>>, int> = 0>
381int multishift_qr_work(bool want_t,
382 bool want_z,
383 size_type<matrix_t> ilo,
384 size_type<matrix_t> ihi,
385 matrix_t& A,
386 vector_t& w,
387 matrix_t& Z,
388 work_t& work)
389{
390 FrancisOpts opts = {};
391 return multishift_qr_work(want_t, want_z, ilo, ihi, A, w, Z, work, opts);
392}
393
444template <TLAPACK_SMATRIX matrix_t,
445 TLAPACK_SVECTOR vector_t,
446 enable_if_t<is_complex<type_t<vector_t>>, int> = 0>
448 bool want_z,
451 matrix_t& A,
452 vector_t& w,
453 matrix_t& Z,
455{
456 using TA = type_t<matrix_t>;
457 using idx_t = size_type<matrix_t>;
458
459 // Functor
461
462 const idx_t n = ncols(A);
463 const idx_t nh = ihi - ilo;
464
465 // This routine uses the space below the subdiagonal as workspace
466 // For small matrices, this is not enough
467 // if n < nmin, the matrix will be passed to lahqr
468 const idx_t nmin = opts.nmin;
469
470 // quick return
471 if (nh <= 0) return 0;
472 if (nh == 1) w[ilo] = A(ilo, ilo);
473
474 // Tiny matrices must use lahqr
475 if (n < nmin) {
476 return lahqr(want_t, want_z, ilo, ihi, A, w, Z);
477 }
478
479 // Allocates workspace
482 std::vector<TA> work_;
483 auto work = new_matrix(work_, workinfo.m, workinfo.n);
484
486}
487
498template <TLAPACK_MATRIX matrix_t,
499 TLAPACK_VECTOR vector_t,
500 enable_if_t<is_complex<type_t<vector_t>>, int> = 0>
501int multishift_qr(bool want_t,
502 bool want_z,
503 size_type<matrix_t> ilo,
504 size_type<matrix_t> ihi,
505 matrix_t& A,
506 vector_t& w,
507 matrix_t& Z)
508{
509 FrancisOpts opts = {};
510 return multishift_qr(want_t, want_z, ilo, ihi, A, w, Z, opts);
511}
512
513} // namespace tlapack
514
515#endif // TLAPACK_MULTISHIFT_QR_HH
constexpr real_type< T > real(const T &x) noexcept
Extends std::real() to real datatypes.
Definition utils.hpp:71
constexpr real_type< T > abs1(const T &x)
1-norm absolute value, |Re(x)| + |Im(x)|
Definition utils.hpp:133
constexpr real_type< T > imag(const T &x) noexcept
Extends std::imag() to real datatypes.
Definition utils.hpp:86
#define TLAPACK_SVECTOR
Macro for tlapack::concepts::SliceableVector compatible with C++17.
Definition concepts.hpp:909
#define TLAPACK_SMATRIX
Macro for tlapack::concepts::SliceableMatrix compatible with C++17.
Definition concepts.hpp:899
#define TLAPACK_WORKSPACE
Macro for tlapack::concepts::Workspace compatible with C++17.
Definition concepts.hpp:912
#define TLAPACK_VECTOR
Macro for tlapack::concepts::Vector compatible with C++17.
Definition concepts.hpp:906
#define TLAPACK_MATRIX
Macro for tlapack::concepts::Matrix compatible with C++17.
Definition concepts.hpp:896
int multishift_qr(bool want_t, bool want_z, size_type< matrix_t > ilo, size_type< matrix_t > ihi, matrix_t &A, vector_t &w, matrix_t &Z, FrancisOpts &opts)
multishift_qr computes the eigenvalues and optionally the Schur factorization of an upper Hessenberg ...
Definition multishift_qr.hpp:447
void lahqr_eig22(T a00, T a01, T a10, T a11, complex_type< T > &s1, complex_type< T > &s2)
Computes the eigenvalues of a 2x2 matrix A.
Definition lahqr_eig22.hpp:34
int lahqr(bool want_t, bool want_z, size_type< matrix_t > ilo, size_type< matrix_t > ihi, matrix_t &A, vector_t &w, matrix_t &Z)
lahqr computes the eigenvalues and optionally the Schur factorization of an upper Hessenberg matrix,...
Definition lahqr.hpp:73
void aggressive_early_deflation_work(bool want_t, bool want_z, size_type< matrix_t > ilo, size_type< matrix_t > ihi, size_type< matrix_t > nw, matrix_t &A, vector_t &s, matrix_t &Z, size_type< matrix_t > &ns, size_type< matrix_t > &nd, work_t &work, FrancisOpts &opts)
aggressive_early_deflation accepts as input an upper Hessenberg matrix H and performs an orthogonal s...
Definition aggressive_early_deflation.hpp:168
int multishift_qr_work(bool want_t, bool want_z, size_type< matrix_t > ilo, size_type< matrix_t > ihi, matrix_t &A, vector_t &w, matrix_t &Z, work_t &work, FrancisOpts &opts)
multishift_qr computes the eigenvalues and optionally the Schur factorization of an upper Hessenberg ...
Definition multishift_qr.hpp:114
void multishift_QR_sweep_work(bool want_t, bool want_z, size_type< matrix_t > ilo, size_type< matrix_t > ihi, matrix_t &A, const vector_t &s, matrix_t &Z, work_t &work)
multishift_QR_sweep performs a single small-bulge multi-shift QR sweep. Workspace is provided as an...
Definition multishift_qr_sweep.hpp:77
#define tlapack_check_false(cond)
Throw an error if cond is true.
Definition exceptionHandling.hpp:113
WorkInfo multishift_qr_worksize(bool want_t, bool want_z, size_type< matrix_t > ilo, size_type< matrix_t > ihi, const matrix_t &A, const vector_t &w, const matrix_t &Z, const FrancisOpts &opts={})
Worspace query of multishift_qr()
Definition multishift_qr.hpp:72
typename traits::real_type_traits< Types..., int >::type real_type
The common real type of the list of types.
Definition scalar_type_traits.hpp:113
Options struct for multishift_qr().
Definition FrancisOpts.hpp:23
Output information in the workspace query.
Definition workspace.hpp:16