<T>LAPACK 0.1.1
C++ Template Linear Algebra PACKage
Loading...
Searching...
No Matches
multishift_qz.hpp
Go to the documentation of this file.
1
5//
6// Copyright (c) 2021-2023, University of Colorado Denver. All rights reserved.
7//
8// This file is part of <T>LAPACK.
9// <T>LAPACK is free software: you can redistribute it and/or modify it under
10// the terms of the BSD 3-Clause license. See the accompanying LICENSE file.
11
12#ifndef TLAPACK_MULTISHIFT_QZ_HH
13#define TLAPACK_MULTISHIFT_QZ_HH
14
21
22namespace tlapack {
23
61template <TLAPACK_SMATRIX matrix_t,
62 TLAPACK_SVECTOR alpha_t,
63 TLAPACK_SVECTOR beta_t>
65 bool want_q,
66 bool want_z,
69 matrix_t& A,
70 matrix_t& B,
72 beta_t& beta,
73 matrix_t& Q,
74 matrix_t& Z,
76{
77 using TA = type_t<matrix_t>;
78 using real_t = real_type<TA>;
79 using idx_t = size_type<matrix_t>;
81
82 // constants
83 const real_t zero(0);
84 const idx_t non_convergence_limit_window = 5;
85 const idx_t non_convergence_limit_shift = 6;
86 const real_t dat1(0.75);
87 const real_t dat2(-0.4375);
88
89 const idx_t n = ncols(A);
90 const idx_t nh = ihi - ilo;
91
92 // This routine uses the space below the subdiagonal as workspace
93 // For small matrices, this is not enough
94 // if n < nmin, the matrix will be passed to lahqr
95 const idx_t nmin = opts.nmin;
96
97 // Recommended number of shifts
98 const idx_t nsr = opts.nshift_recommender(n, nh);
99
100 // Recommended deflation window size
101 const idx_t nwr = opts.deflation_window_recommender(n, nh);
102 const idx_t nw_max = (n - 3) / 3;
103
104 const idx_t nibble = opts.nibble;
105
106 const real_t eps = ulp<real_t>();
108
109 int n_aed = 0;
110 int n_sweep = 0;
111 int n_shifts_total = 0;
112
113 // check arguments
114 tlapack_check_false(n != nrows(A));
115 tlapack_check_false(n != ncols(B));
116 tlapack_check_false(n != nrows(B));
117 tlapack_check_false((idx_t)size(alpha) != n);
118 tlapack_check_false((idx_t)size(beta) != n);
119 if (want_z) {
120 tlapack_check_false((n != ncols(Z)) or (n != nrows(Z)));
121 }
122 if (want_q) {
123 tlapack_check_false((n != ncols(Q)) or (n != nrows(Q)));
124 }
125
126 // quick return
127 if (nh <= 0) return 0;
128 if (nh == 1) {
129 alpha[ilo] = A(ilo, ilo);
130 beta[ilo] = B(ilo, ilo);
131 }
132
133 // Tiny matrices must use lahqz
134 if (n < nmin) {
135 return lahqz(want_s, want_q, want_z, ilo, ihi, A, B, alpha, beta, Q, Z);
136 }
137
138 // itmax is the total number of QR iterations allowed.
139 // For most matrices, 3 shifts per eigenvalue is enough, so
140 // we set itmax to 30 times nh as a safe limit.
141 const idx_t itmax = 30 * std::max<idx_t>(10, nh);
142
143 // k_defl counts the number of iterations since a deflation
144 idx_t k_defl = 0;
145
146 // istop is the end of the active subblock.
147 // As more and more eigenvalues converge, it eventually
148 // becomes ilo+1 and the loop ends.
149 idx_t istop = ihi;
150
151 int info = 0;
152
153 // Norm of B, used for checking infinite eigenvalues
154 const real_t bnorm =
155 lange(FROB_NORM, slice(B, range(ilo, ihi), range(ilo, ihi)));
156
157 // nw is the deflation window size
158 idx_t nw;
159
160 for (idx_t iter = 0; iter <= itmax; ++iter) {
161 if (iter == itmax) {
162 // The QZ algorithm failed to converge, return with error.
163 info = istop;
164 break;
165 }
166
167 if (ilo + 1 >= istop) {
168 if (ilo + 1 == istop) {
169 alpha[ilo] = A(ilo, ilo);
170 beta[ilo] = B(ilo, ilo);
171 }
172 // All eigenvalues have been found, exit and return 0.
173 break;
174 }
175
176 // istart is the start of the active subblock. Either
177 // istart = ilo, or H(istart, istart-1) = 0. This means
178 // that we can treat this subblock separately.
179 idx_t istart = ilo;
180
181 // Find active block
182 idx_t istart_m;
183 idx_t istop_m;
184 if (!want_s) {
186 istop_m = istop;
187 }
188 else {
189 istart_m = 0;
190 istop_m = n;
191 }
192
193 // Check if active subblock has split
194 for (idx_t i = istop - 1; i > istart; --i) {
195 if (abs1(A(i, i - 1)) <= small_num) {
196 // A(i,i-1) is negligible, take i as new istart.
197 A(i, i - 1) = zero;
198 istart = i;
199 break;
200 }
201
202 real_t tst = abs1(A(i - 1, i - 1)) + abs1(A(i, i));
203 if (tst == zero) {
204 if (i >= ilo + 2) {
205 tst = tst + abs(A(i - 1, i - 2));
206 }
207 if (i < ihi) {
208 tst = tst + abs(A(i + 1, i));
209 }
210 }
211 if (abs1(A(i, i - 1)) <= eps * tst) {
212 //
213 // The elementwise deflation test has passed
214 // The following performs second deflation test due
215 // to Steel, Vandebril and Langou (2023). It has better
216 // mathematical foundation and improves accuracy in some
217 // examples.
218 //
219 // The test is |A(i,i-1)|*|A(i-1,i)*B(i,i) - A(i,i)*B(i-1,i)| <=
220 // eps*|A(i,i)|*|A(i-1,i-1)*B(i,i)-A(i,i)*B(i-1,i-1)|.
221 // The multiplications might overflow so we do some scaling
222 // first.
223 //
224 const real_t tst1 =
225 abs1(B(i, i) * A(i - 1, i) - A(i, i) * B(i - 1, i));
226 const real_t tst2 =
227 abs1(B(i, i) * A(i - 1, i - 1) - A(i, i) * B(i - 1, i - 1));
228 real_t ab = max(abs1(A(i, i - 1)), tst1);
229 real_t ba = min(abs1(A(i, i - 1)), tst1);
230 real_t aa = max(abs1(A(i, i)), tst2);
231 real_t bb = min(abs1(A(i, i)), tst2);
232 real_t s = aa + ab;
233 if (ba * (ab / s) <= max(small_num, eps * (bb * (aa / s)))) {
234 // A(i,i-1) is negligible, take i as new istart.
235 A(i, i - 1) = zero;
236 istart = i;
237 break;
238 }
239 }
240 }
241
242 // check for infinite eigenvalues
243 for (idx_t i = istart; i < istop; ++i) {
244 if (abs1(B(i, i)) <= max(small_num, eps * bnorm)) {
245 // B(i,i) is negligible, so B is singular, i.e. (A,B) has an
246 // infinite eigenvalue. Move it to the top to be deflated
247 B(i, i) = zero;
248
249 real_t c;
250 TA s;
251 for (idx_t j = i; j > istart; j--) {
252 rotg(B(j - 1, j), B(j - 1, j - 1), c, s);
253 B(j - 1, j - 1) = zero;
254 // Apply rotation from the right
255 {
256 auto b1 = slice(B, range(istart_m, j - 1), j);
257 auto b2 = slice(B, range(istart_m, j - 1), j - 1);
258 rot(b1, b2, c, s);
259
260 auto a1 = slice(A, range(istart_m, min(j + 2, n)), j);
261 auto a2 =
262 slice(A, range(istart_m, min(j + 2, n)), j - 1);
263 rot(a1, a2, c, s);
264
265 if (want_z) {
266 auto z1 = col(Z, j);
267 auto z2 = col(Z, j - 1);
268 rot(z1, z2, c, s);
269 }
270 }
271 // Remove fill-in in A
272 if (j < istop - 1) {
273 rotg(A(j, j - 1), A(j + 1, j - 1), c, s);
274 A(j + 1, j - 1) = zero;
275
276 auto a1 = slice(A, j, range(j, istop_m));
277 auto a2 = slice(A, j + 1, range(j, istop_m));
278 rot(a1, a2, c, s);
279 auto b1 = slice(B, j, range(j + 1, istop_m));
280 auto b2 = slice(B, j + 1, range(j + 1, istop_m));
281 rot(b1, b2, c, s);
282
283 if (want_q) {
284 auto q1 = col(Q, j);
285 auto q2 = col(Q, j + 1);
286 rot(q1, q2, c, conj(s));
287 }
288 }
289 }
290
291 if (istart + 1 < istop) {
292 rotg(A(istart, istart), A(istart + 1, istart), c, s);
293 A(istart + 1, istart) = zero;
294
295 auto a1 = slice(A, istart, range(istart + 1, istop_m));
296 auto a2 = slice(A, istart + 1, range(istart + 1, istop_m));
297 rot(a1, a2, c, s);
298 auto b1 = slice(B, istart, range(istart + 1, istop_m));
299 auto b2 = slice(B, istart + 1, range(istart + 1, istop_m));
300 rot(b1, b2, c, s);
301
302 if (want_q) {
303 auto q1 = col(Q, istart);
304 auto q2 = col(Q, istart + 1);
305 rot(q1, q2, c, conj(s));
306 }
307 }
309 beta[istart] = zero;
310 istart = istart + 1;
311 }
312 }
313
314 if (istart == istop) {
315 istop = istop - 1;
316 istart = ilo;
317 continue;
318 }
319 // Check if 1x1 block has split off
320 if (istart + 1 == istop) {
321 k_defl = 0;
322 // Normalize the block, make sure B(istart, istart) is real and
323 // positive
324 if constexpr (is_real<TA>) {
325 if (B(istart, istart) < 0.) {
326 for (idx_t i = istart_m; i <= istart; ++i) {
327 B(i, istart) = -B(i, istart);
328 A(i, istart) = -A(i, istart);
329 }
330 if (want_z) {
331 for (idx_t i = 0; i < n; ++i) {
332 Z(i, istart) = -Z(i, istart);
333 }
334 }
335 }
336 }
337 else {
338 real_t absB = abs(B(istart, istart));
339 if (absB > small_num and (imag(B(istart, istart)) != zero or
340 real(B(istart, istart)) < zero)) {
341 TA scal = conj(B(istart, istart) / absB);
342 for (idx_t i = istart_m; i <= istart; ++i) {
343 B(i, istart) = scal * B(i, istart);
344 A(i, istart) = scal * A(i, istart);
345 }
346 if (want_z) {
347 for (idx_t i = 0; i < n; ++i) {
348 Z(i, istart) = scal * Z(i, istart);
349 }
350 }
351 B(istart, istart) = absB;
352 }
353 else {
354 B(istart, istart) = zero;
355 }
356 }
359 istop = istart;
360 istart = ilo;
361 continue;
362 }
363 // Check if 2x2 block has split off
364 if constexpr (is_real<TA>) {
365 if (istart + 2 == istop) {
366 // 2x2 block, normalize the block
367 auto A22 = slice(A, range(istart, istop), range(istart, istop));
368 auto B22 = slice(B, range(istart, istop), range(istart, istop));
370 beta[istart], beta[istart + 1]);
371 // Only split off the block if the eigenvalues are imaginary
372 if (imag(alpha[istart]) != zero) {
373 // Standardize, that is, rotate so that
374 // ( B11 0 )
375 // B = ( ) with B11 non-negative
376 // ( 0 B22 )
377 TA ssmin, ssmax, csl, snl, csr, snr;
378 svd22(B22(0, 0), B22(0, 1), B22(1, 1), ssmin, ssmax, csl,
379 snl, csr, snr);
380
381 if (ssmax < (TA)0) {
382 csr = -csr;
383 snr = -snr;
384 ssmin = -ssmin;
385 ssmax = -ssmax;
386 }
387
388 B22(0, 0) = ssmax;
389 B22(1, 1) = ssmin;
390 B22(0, 1) = (TA)0;
391
392 // Apply rotations to A
393 auto a1l = slice(A, istart, range(istart, istop_m));
394 auto a2l = slice(A, istart + 1, range(istart, istop_m));
395 rot(a1l, a2l, csl, snl);
396 auto a1r = slice(A, range(istart_m, istop), istart);
397 auto a2r = slice(A, range(istart_m, istop), istart + 1);
398 rot(a1r, a2r, csr, snr);
399 // Apply rotations to B
400 if (istart + 2 < n) {
401 auto b1l = slice(B, istart, range(istart + 2, istop_m));
402 auto b2l =
403 slice(B, istart + 1, range(istart + 2, istop_m));
404 rot(b1l, b2l, csl, snl);
405 }
406 auto b1r = slice(B, range(istart_m, istart), istart);
407 auto b2r = slice(B, range(istart_m, istart), istart + 1);
408 rot(b1r, b2r, csr, snr);
409
410 // Apply rotation to Q
411 if (want_q) {
412 auto q1 = col(Q, istart);
413 auto q2 = col(Q, istart + 1);
414 rot(q1, q2, csl, snl);
415 }
416
417 // Apply rotation to Z
418 if (want_z) {
419 auto z1 = col(Z, istart);
420 auto z2 = col(Z, istart + 1);
421 rot(z1, z2, csr, snr);
422 }
423
424 k_defl = 0;
425 istop = istart;
426 istart = ilo;
427 continue;
428 }
429 }
430 }
431
432 //
433 // Agressive early deflation
434 //
435 idx_t nh = istop - istart;
436 idx_t nwupbd = min(nh, nw_max);
438 nw = min(nwupbd, nwr);
439 }
440 else {
441 // There have been no deflations in many iterations
442 // Try to vary the deflation window size.
443 nw = min(nwupbd, 2 * nw);
444 }
445 if (nh <= 4) {
446 // n >= nmin, so there is always enough space for a 4x4 window
447 nw = nh;
448 }
449 if (nw < nw_max) {
450 if (nw + 1 >= nh) nw = nh;
451 idx_t kwtop = istop - nw;
452 if (kwtop > istart + 2)
453 if (abs1(A(kwtop, kwtop - 1)) > abs1(A(kwtop - 1, kwtop - 2)))
454 nw = nw + 1;
455 }
456
457 idx_t ls, ld;
458 n_aed = n_aed + 1;
460 istop, nw, A, B, alpha, beta, Q,
461 Z, ls, ld, opts);
462
463 istop = istop - ld;
464
465 if (ld > 0) k_defl = 0;
466
467 // Skip an expensive QR sweep if there is a (partly heuristic)
468 // reason to expect that many eigenvalues will deflate without it.
469 // Here, the QR sweep is skipped if many eigenvalues have just been
470 // deflated or if the remaining active block is small.
471 if (ld > 0 and
472 (100 * ld > nwr * nibble or istop <= istart + min(nmin, nw_max))) {
473 continue;
474 }
475
476 k_defl = k_defl + 1;
477 idx_t ns = min(nh - 1, min(ls, nsr));
478
479 idx_t i_shifts = istop - ls;
480
482 // This exceptional shift closely resembles the QR exceptional shift
483 // This is nice for easy maintenance, but is not guaranteed to work.
484 ns = nsr;
485 for (idx_t i = i_shifts; i < istop - 1; i = i + 2) {
486 real_t ss = abs1(A(i, i - 1));
487 if (i > 1) ss += abs1(A(i - 1, i - 2));
488 TA aa = dat1 * ss + A(i, i);
489 TA bb = ss;
490 TA cc = dat2 * ss;
491 TA dd = aa;
492 lahqr_eig22(aa, bb, cc, dd, alpha[i], alpha[i + 1]);
493 beta[i] = (TA)1;
494 beta[i + 1] = (TA)1;
495 }
496 }
497 else {
498 // Sort the shifts (helps a little)
499 // Bubble sort keeps complex conjugate pairs together
500 bool sorted = false;
501 idx_t k = istop;
502 while (!sorted && k > i_shifts) {
503 sorted = true;
504 for (idx_t i = i_shifts; i < k - 1; ++i) {
505 if (abs1(alpha[i] * beta[i + 1]) <
506 abs1(alpha[i + 1] * beta[i])) {
507 sorted = false;
508 const type_t<alpha_t> tmp = alpha[i];
509 alpha[i] = alpha[i + 1];
510 alpha[i + 1] = tmp;
511 const type_t<beta_t> tmp2 = beta[i];
512 beta[i] = beta[i + 1];
513 beta[i + 1] = tmp2;
514 }
515 }
516 --k;
517 }
518
519 // Shuffle shifts into pairs of real shifts
520 // and pairs of complex conjugate shifts
521 // assuming complex conjugate shifts are
522 // already adjacent to one another. (Yes,
523 // they are.)
524 for (idx_t i = istop - 1; i > i_shifts + 1; i = i - 2) {
525 if (imag(alpha[i]) != -imag(alpha[i - 1])) {
526 const type_t<alpha_t> tmp = alpha[i];
527 alpha[i] = alpha[i - 1];
528 alpha[i - 1] = alpha[i - 2];
529 alpha[i - 2] = tmp;
530 const type_t<beta_t> tmp2 = beta[i];
531 beta[i] = beta[i - 1];
532 beta[i - 1] = beta[i - 2];
533 beta[i - 2] = tmp2;
534 }
535 }
536
537 // Since we shuffled the shifts, we will only drop
538 // Real shifts
539 if (ns % 2 == 1) ns = ns - 1;
540 i_shifts = istop - ns;
541 }
542
543 // If there are only two shifts and both are real
544 // then use only one (helps avoid interference)
545 if (is_real<TA>) {
546 if (ns == 2) {
547 if (imag(alpha[i_shifts]) == zero) {
548 if (abs(real(alpha[i_shifts]) - A(istop - 1, istop - 1)) <
549 abs(real(alpha[i_shifts + 1]) -
550 A(istop - 1, istop - 1))) {
552 beta[i_shifts + 1] = beta[i_shifts];
553 }
554 else {
556 beta[i_shifts] = beta[i_shifts + 1];
557 }
558 }
559 }
560 }
561 auto alpha_shifts = slice(alpha, range{i_shifts, i_shifts + ns});
562 auto beta_shifts = slice(beta, range{i_shifts, i_shifts + ns});
563
564 n_sweep = n_sweep + 1;
565 n_shifts_total = n_shifts_total + ns;
568 }
569
570 opts.n_aed = n_aed;
571 opts.n_shifts_total = n_shifts_total;
572 opts.n_sweep = n_sweep;
573
574 return info;
575}
576
577} // namespace tlapack
578
579#endif // TLAPACK_MULTISHIFT_QZ_HH
constexpr internal::FrobNorm FROB_NORM
Frobenius norm of matrices.
Definition types.hpp:342
constexpr real_type< T > real(const T &x) noexcept
Extends std::real() to real datatypes.
Definition utils.hpp:71
constexpr T conj(const T &x) noexcept
Extends std::conj() to real datatypes.
Definition utils.hpp:100
constexpr real_type< T > abs1(const T &x)
1-norm absolute value, |Re(x)| + |Im(x)|
Definition utils.hpp:133
constexpr real_type< T > imag(const T &x) noexcept
Extends std::imag() to real datatypes.
Definition utils.hpp:86
#define TLAPACK_SVECTOR
Macro for tlapack::concepts::SliceableVector compatible with C++17.
Definition concepts.hpp:909
#define TLAPACK_SMATRIX
Macro for tlapack::concepts::SliceableMatrix compatible with C++17.
Definition concepts.hpp:899
void multishift_QZ_sweep(bool want_t, bool want_q, bool want_z, size_type< matrix_t > ilo, size_type< matrix_t > ihi, matrix_t &A, matrix_t &B, const alpha_t &alpha, const beta_t &beta, matrix_t &Q, matrix_t &Z)
multishift_QR_sweep performs a single small-bulge multi-shift QR sweep.
Definition multishift_qz_sweep.hpp:63
auto lange(norm_t normType, const matrix_t &A)
Calculates the norm of a matrix.
Definition lange.hpp:38
int multishift_qz(bool want_s, bool want_q, bool want_z, size_type< matrix_t > ilo, size_type< matrix_t > ihi, matrix_t &A, matrix_t &B, alpha_t &alpha, beta_t &beta, matrix_t &Q, matrix_t &Z, FrancisOpts &opts)
multishift_qz computes the eigenvalues of a matrix pair (H,T), where H is an upper Hessenberg matrix ...
Definition multishift_qz.hpp:64
void svd22(const T &f, const T &g, const T &h, T &ssmin, T &ssmax, T &csl, T &snl, T &csr, T &snr)
Computes the singular value decomposition of a 2-by-2 real triangular matrix.
Definition svd22.hpp:55
void lahqr_eig22(T a00, T a01, T a10, T a11, complex_type< T > &s1, complex_type< T > &s2)
Computes the eigenvalues of a 2x2 matrix A.
Definition lahqr_eig22.hpp:34
int lahqz(bool want_s, bool want_q, bool want_z, size_type< matrix_t > ilo, size_type< matrix_t > ihi, matrix_t &A, matrix_t &B, alpha_t &alpha, beta_t &beta, matrix_t &Q, matrix_t &Z)
lahqz computes the eigenvalues of a matrix pair (H,T), where H is an upper Hessenberg matrix and T is...
Definition lahqz.hpp:60
void rotg(T &a, T &b, T &c, T &s)
Construct plane rotation that eliminates b, such that:
Definition rotg.hpp:39
void rot(vectorX_t &x, vectorY_t &y, const c_type &c, const s_type &s)
Apply plane rotation:
Definition rot.hpp:44
void scal(const alpha_t &alpha, vector_t &x)
Scale vector by constant, .
Definition scal.hpp:30
void aggressive_early_deflation_generalized(bool want_s, bool want_q, bool want_z, size_type< matrix_t > ilo, size_type< matrix_t > ihi, size_type< matrix_t > nw, matrix_t &A, matrix_t &B, alpha_t &alpha, beta_t &beta, matrix_t &Q, matrix_t &Z, size_type< matrix_t > &ns, size_type< matrix_t > &nd, FrancisOpts &opts)
aggressive_early_deflation_generalized accepts as input an upper Hessenberg pencil (A,...
Definition aggressive_early_deflation_generalized.hpp:100
#define tlapack_check_false(cond)
Throw an error if cond is true.
Definition exceptionHandling.hpp:113
void lahqz_eig22(const A_t &A, const B_t &B, complex_type< T > &alpha1, complex_type< T > &alpha2, T &beta1, T &beta2)
Computes the generalized eigenvalues of a 2x2 pencil (A,B) with B upper triangular.
Definition lahqz_eig22.hpp:35
typename traits::real_type_traits< Types..., int >::type real_type
The common real type of the list of types.
Definition scalar_type_traits.hpp:113
Options struct for multishift_qr().
Definition FrancisOpts.hpp:23