<T>LAPACK 0.1.1
C++ Template Linear Algebra PACKage
Loading...
Searching...
No Matches
lahqr.hpp
Go to the documentation of this file.
1
5//
6// Copyright (c) 2021-2023, University of Colorado Denver. All rights reserved.
7//
8// This file is part of <T>LAPACK.
9// <T>LAPACK is free software: you can redistribute it and/or modify it under
10// the terms of the BSD 3-Clause license. See the accompanying LICENSE file.
11
12#ifndef TLAPACK_LAHQR_HH
13#define TLAPACK_LAHQR_HH
14
16#include "tlapack/blas/rot.hpp"
17#include "tlapack/blas/rotg.hpp"
22
23namespace tlapack {
24
69template <TLAPACK_CSMATRIX matrix_t,
70 TLAPACK_VECTOR vector_t,
71 enable_if_t<is_complex<type_t<vector_t>>, bool> = true,
72 enable_if_t<is_real<type_t<matrix_t>>, bool> = true>
73int lahqr(bool want_t,
74 bool want_z,
77 matrix_t& A,
78 vector_t& w,
79 matrix_t& Z)
80{
81 using TA = type_t<matrix_t>;
82 using real_t = real_type<TA>;
83 using idx_t = size_type<matrix_t>;
85
86 // Functor
88
89 // constants
90 const real_t zero(0);
91 const real_t one(1);
92 const real_t eps = ulp<real_t>();
94 const idx_t non_convergence_limit = 10;
95 const real_t dat1(0.75);
96 const real_t dat2(-0.4375);
97
98 const idx_t n = ncols(A);
99 const idx_t nh = ihi - ilo;
100
101 // check arguments
102 tlapack_check_false(n != nrows(A));
103 tlapack_check_false((idx_t)size(w) != n);
104 if (want_z) {
105 tlapack_check_false((n != ncols(Z)) or (n != nrows(Z)));
106 }
107
108 // quick return
109 if (nh <= 0) return 0;
110 if (nh == 1) w[ilo] = A(ilo, ilo);
111
112 // itmax is the total number of QR iterations allowed.
113 // For most matrices, 3 shifts per eigenvalue is enough, so
114 // we set itmax to 30 times nh as a safe limit.
115 const idx_t itmax = 30 * std::max<idx_t>(10, nh);
116
117 // k_defl counts the number of iterations since a deflation
118 idx_t k_defl = 0;
119
120 // istop is the end of the active subblock.
121 // As more and more eigenvalues converge, it eventually
122 // becomes ilo+1 and the loop ends.
123 idx_t istop = ihi;
124 // istart is the start of the active subblock. Either
125 // istart = ilo, or H(istart, istart-1) = 0. This means
126 // that we can treat this subblock separately.
127 idx_t istart = ilo;
128
129 for (idx_t iter = 0; iter <= itmax; ++iter) {
130 if (iter == itmax) {
131 // The QR algorithm failed to converge, return with error.
133 istop,
134 "The QR algorithm failed to compute all the eigenvalues"
135 " in a total of 30 iterations per eigenvalue. Elements"
136 " i:ihi of w contain those eigenvalues which have been"
137 " successfully computed.");
138 return istop;
139 }
140
141 if (istart + 1 >= istop) {
142 if (istart + 1 == istop) w[istart] = A(istart, istart);
143 // All eigenvalues have been found, exit and return 0.
144 break;
145 }
146
147 // Determine range to apply rotations
148 idx_t istart_m;
149 idx_t istop_m;
150 if (!want_t) {
152 istop_m = istop;
153 }
154 else {
155 istart_m = 0;
156 istop_m = n;
157 }
158
159 // Check if active subblock has split
160 for (idx_t i = istop - 1; i > istart; --i) {
161 if (abs1(A(i, i - 1)) <= small_num) {
162 // A(i,i-1) is negligible, take i as new istart.
163 A(i, i - 1) = zero;
164 istart = i;
165 break;
166 }
167
168 real_t tst = abs1(A(i - 1, i - 1)) + abs1(A(i, i));
169 if (tst == zero) {
170 if (i >= ilo + 2) {
171 tst = tst + abs(A(i - 1, i - 2));
172 }
173 if (i < ihi) {
174 tst = tst + abs(A(i + 1, i));
175 }
176 }
177 if (abs1(A(i, i - 1)) <= eps * tst) {
178 //
179 // The elementwise deflation test has passed
180 // The following performs second deflation test due
181 // to Ahues & Tisseur (LAWN 122, 1997). It has better
182 // mathematical foundation and improves accuracy in some
183 // examples.
184 //
185 // The test is |A(i,i-1)|*|A(i-1,i)| <=
186 // eps*|A(i,i)|*|A(i-1,i-1)| The multiplications might overflow
187 // so we do some scaling first.
188 //
189 const real_t aij = abs1(A(i, i - 1));
190 const real_t aji = abs1(A(i - 1, i));
191 real_t ab = (aij > aji) ? aij : aji; // Propagates NaNs in aji
192 real_t ba = (aij < aji) ? aij : aji; // Propagates NaNs in aji
193 real_t aa = max(abs1(A(i, i)), abs1(A(i, i) - A(i - 1, i - 1)));
194 real_t bb = min(abs1(A(i, i)), abs1(A(i, i) - A(i - 1, i - 1)));
195 real_t s = aa + ab;
196 if (ba * (ab / s) <= max(small_num, eps * (bb * (aa / s)))) {
197 // A(i,i-1) is negligible, take i as new istart.
198 A(i, i - 1) = zero;
199 istart = i;
200 break;
201 }
202 }
203 }
204
205 if (istart + 2 >= istop) {
206 if (istart + 1 == istop) {
207 // 1x1 block
208 k_defl = 0;
209 w[istart] = A(istart, istart);
210 istop = istart;
211 istart = ilo;
212 continue;
213 }
214 if constexpr (is_real<TA>) {
215 if (istart + 2 == istop) {
216 // 2x2 block, normalize the block
217 real_t cs;
218 TA sn;
219 TA Aii = A(istart, istart);
220 TA Ajj = A(istart + 1, istart + 1);
221 TA Aij = A(istart, istart + 1);
222 TA Aji = A(istart + 1, istart);
225 lahqr_schur22(Aii, Aij, Aji, Ajj, wi, wj, cs, sn);
226 A(istart, istart) = Aii;
227 A(istart + 1, istart + 1) = Ajj;
228 A(istart, istart + 1) = Aij;
229 A(istart + 1, istart) = Aji;
230 w[istart] = wi;
231 w[istart + 1] = wj;
232 // Apply the rotations from the normalization to the rest of
233 // the matrix.
234 if (want_t) {
235 if (istart + 2 < istop_m) {
236 auto x =
237 slice(A, istart, range{istart + 2, istop_m});
238 auto y = slice(A, istart + 1,
239 range{istart + 2, istop_m});
240 rot(x, y, cs, sn);
241 }
242 auto x2 = slice(A, range{istart_m, istart}, istart);
243 auto y2 = slice(A, range{istart_m, istart}, istart + 1);
244 rot(x2, y2, cs, sn);
245 }
246 if (want_z) {
247 auto x = col(Z, istart);
248 auto y = col(Z, istart + 1);
249 rot(x, y, cs, sn);
250 }
251 k_defl = 0;
252 istop = istart;
253 istart = ilo;
254 continue;
255 }
256 }
257 }
258
259 // Determine shift
260 TA a00, a01, a10, a11;
261 k_defl = k_defl + 1;
262 if (k_defl % non_convergence_limit == 0) {
263 // Exceptional shift
264 real_t s = abs(A(istop - 1, istop - 2));
265 if (istop > ilo + 2) s = s + abs(A(istop - 2, istop - 3));
266 a00 = dat1 * s + A(istop - 1, istop - 1);
267 a01 = dat2 * s;
268 a10 = s;
269 a11 = a00;
270 }
271 else {
272 // Wilkinson shift
273 a00 = A(istop - 2, istop - 2);
274 a10 = A(istop - 1, istop - 2);
275 a01 = A(istop - 2, istop - 1);
276 a11 = A(istop - 1, istop - 1);
277 }
280 lahqr_eig22(a00, a01, a10, a11, s1, s2);
281 if ((imag(s1) == zero and imag(s2) == zero) or is_complex<TA>) {
282 // The eigenvalues are not complex conjugate, keep only the one
283 // closest to A(istop-1, istop-1)
284 if (abs1(s1 - A(istop - 1, istop - 1)) <=
285 abs1(s2 - A(istop - 1, istop - 1)))
286 s2 = s1;
287 else
288 s1 = s2;
289 }
290
291 // We have already checked whether the subblock has split.
292 // If it has split, we can introduce any shift at the top of the new
293 // subblock. Now that we know the specific shift, we can also check
294 // whether we can introduce that shift somewhere else in the subblock.
295 TA v_[3];
296 auto v = new_3_vector(v_);
297 TA t1;
298 idx_t istart2 = istart;
299 if (istart + 3 < istop) {
300 for (idx_t i = istop - 3; i > istart; --i) {
301 auto H = slice(A, range{i, i + 3}, range{i, i + 3});
304 v[0] = t1;
305 const TA refsum =
306 conj(v[0]) * A(i, i - 1) + conj(v[1]) * A(i + 1, i - 1);
307 if (abs1(A(i + 1, i - 1) - refsum * v[1]) +
308 abs1(refsum * v[2]) <=
309 eps * (abs1(A(i, i - 1)) + abs1(A(i, i + 1)) +
310 abs1(A(i + 1, i + 2)))) {
311 istart2 = i;
312 break;
313 }
314 }
315 }
316
317 for (idx_t i = istart2; i < istop - 1; ++i) {
318 const idx_t nr = std::min<idx_t>(3, istop - i);
319 if (i == istart2) {
320 auto H = slice(A, range{i, i + nr}, range{i, i + nr});
321 auto x = slice(v, range{0, nr});
323 auto y = slice(v, range{1, nr});
324 TA alpha = v[0];
326 v[0] = alpha;
327 if (i > istart) {
328 A(i, i - 1) = A(i, i - 1) * (one - conj(t1));
329 }
330 }
331 else {
332 v[0] = A(i, i - 1);
333 v[1] = A(i + 1, i - 1);
334 if (nr == 3) v[2] = A(i + 2, i - 1);
335 auto x = slice(v, range{1, nr});
336 TA alpha = v[0];
338 v[0] = alpha;
339 A(i, i - 1) = v[0];
340 A(i + 1, i - 1) = zero;
341 if (nr == 3) A(i + 2, i - 1) = zero;
342 }
343
344 // The following code applies the reflector we have just calculated.
345 // We write this out instead of using larf because a direct loop is
346 // more efficient for small reflectors.
347
348 t1 = conj(t1);
349 const TA v2 = v[1];
350 const TA t2 = t1 * v2;
351 TA sum;
352 if (nr == 3) {
353 const TA v3 = v[2];
354 const TA t3 = t1 * v[2];
355
356 // Apply G from the left to A
357 for (idx_t j = i; j < istop_m; ++j) {
358 sum = A(i, j) + conj(v2) * A(i + 1, j) +
359 conj(v3) * A(i + 2, j);
360 A(i, j) = A(i, j) - sum * t1;
361 A(i + 1, j) = A(i + 1, j) - sum * t2;
362 A(i + 2, j) = A(i + 2, j) - sum * t3;
363 }
364 // Apply G from the right to A
365 for (idx_t j = istart_m; j < min(i + 4, istop); ++j) {
366 sum = A(j, i) + v2 * A(j, i + 1) + v3 * A(j, i + 2);
367 A(j, i) = A(j, i) - sum * conj(t1);
368 A(j, i + 1) = A(j, i + 1) - sum * conj(t2);
369 A(j, i + 2) = A(j, i + 2) - sum * conj(t3);
370 }
371 if (want_z) {
372 // Apply G to Z from the right
373 for (idx_t j = 0; j < n; ++j) {
374 sum = Z(j, i) + v2 * Z(j, i + 1) + v3 * Z(j, i + 2);
375 Z(j, i) = Z(j, i) - sum * conj(t1);
376 Z(j, i + 1) = Z(j, i + 1) - sum * conj(t2);
377 Z(j, i + 2) = Z(j, i + 2) - sum * conj(t3);
378 }
379 }
380 }
381 else {
382 // Apply G from the left to A
383 for (idx_t j = i; j < istop_m; ++j) {
384 sum = A(i, j) + conj(v2) * A(i + 1, j);
385 A(i, j) = A(i, j) - sum * t1;
386 A(i + 1, j) = A(i + 1, j) - sum * t2;
387 }
388 // Apply G from the right to A
389 for (idx_t j = istart_m; j < min(i + 3, istop); ++j) {
390 sum = A(j, i) + v2 * A(j, i + 1);
391 A(j, i) = A(j, i) - sum * conj(t1);
392 A(j, i + 1) = A(j, i + 1) - sum * conj(t2);
393 }
394 if (want_z) {
395 // Apply G to Z from the right
396 for (idx_t j = 0; j < n; ++j) {
397 sum = Z(j, i) + v2 * Z(j, i + 1);
398 Z(j, i) = Z(j, i) - sum * conj(t1);
399 Z(j, i + 1) = Z(j, i + 1) - sum * conj(t2);
400 }
401 }
402 }
403 }
404 }
405
406 return 0;
407}
408
415template <TLAPACK_CSMATRIX matrix_t,
416 TLAPACK_VECTOR vector_t,
417 enable_if_t<is_complex<type_t<vector_t>>, bool> = true,
418 enable_if_t<is_complex<type_t<matrix_t>>, bool> = true>
419int lahqr(bool want_t,
420 bool want_z,
421 size_type<matrix_t> ilo,
422 size_type<matrix_t> ihi,
423 matrix_t& A,
424 vector_t& w,
425 matrix_t& Z)
426{
427 using TA = type_t<matrix_t>;
428 using real_t = real_type<TA>;
429 using idx_t = size_type<matrix_t>;
430
431 // constants
432 const real_t zero(0);
433 const real_t eps = ulp<real_t>();
434 const real_t small_num = safe_min<real_t>() / ulp<real_t>();
435 const idx_t non_convergence_limit = 10;
436 const real_t dat1(0.75);
437 const real_t dat2(-0.4375);
438
439 const idx_t n = ncols(A);
440 const idx_t nh = ihi - ilo;
441
442 // check arguments
443 tlapack_check_false(n != nrows(A));
444 tlapack_check_false((idx_t)size(w) != n);
445 if (want_z) {
446 tlapack_check_false((n != ncols(Z)) or (n != nrows(Z)));
447 }
448
449 // quick return
450 if (nh <= 0) return 0;
451 if (nh == 1) w[ilo] = A(ilo, ilo);
452
453 // itmax is the total number of QR iterations allowed.
454 // For most matrices, 3 shifts per eigenvalue is enough, so
455 // we set itmax to 30 times nh as a safe limit.
456 const idx_t itmax = 30 * std::max<idx_t>(10, nh);
457
458 // k_defl counts the number of iterations since a deflation
459 idx_t k_defl = 0;
460
461 // istop is the end of the active subblock.
462 // As more and more eigenvalues converge, it eventually
463 // becomes ilo+1 and the loop ends.
464 idx_t istop = ihi;
465 // istart is the start of the active subblock. Either
466 // istart = ilo, or H(istart, istart-1) = 0. This means
467 // that we can treat this subblock separately.
468 idx_t istart = ilo;
469
470 for (idx_t iter = 0; iter <= itmax; ++iter) {
471 if (iter == itmax) {
472 // The QR algorithm failed to converge, return with error.
473 return istop;
474 }
475
476 if (istart + 1 >= istop) {
477 if (istart + 1 == istop) w[istart] = A(istart, istart);
478 // All eigenvalues have been found, exit and return 0.
479 break;
480 }
481
482 // Determine range to apply rotations
483 idx_t istart_m;
484 idx_t istop_m;
485 if (!want_t) {
486 istart_m = istart;
487 istop_m = istop;
488 }
489 else {
490 istart_m = 0;
491 istop_m = n;
492 }
493
494 // Check if active subblock has split
495 for (idx_t i = istop - 1; i > istart; --i) {
496 if (abs1(A(i, i - 1)) <= small_num) {
497 // A(i,i-1) is negligible, take i as new istart.
498 A(i, i - 1) = zero;
499 istart = i;
500 break;
501 }
502
503 real_t tst = abs1(A(i - 1, i - 1)) + abs1(A(i, i));
504 if (tst == zero) {
505 if (i >= ilo + 2) {
506 tst = tst + abs(A(i - 1, i - 2));
507 }
508 if (i < ihi) {
509 tst = tst + abs(A(i + 1, i));
510 }
511 }
512 if (abs1(A(i, i - 1)) <= eps * tst) {
513 //
514 // The elementwise deflation test has passed
515 // The following performs second deflation test due
516 // to Ahues & Tisseur (LAWN 122, 1997). It has better
517 // mathematical foundation and improves accuracy in some
518 // examples.
519 //
520 // The test is |A(i,i-1)|*|A(i-1,i)| <=
521 // eps*|A(i,i)|*|A(i-1,i-1)| The multiplications might overflow
522 // so we do some scaling first.
523 //
524 const real_t aij = abs1(A(i, i - 1));
525 const real_t aji = abs1(A(i - 1, i));
526 real_t ab = (aij > aji) ? aij : aji; // Propagates NaNs in aji
527 real_t ba = (aij < aji) ? aij : aji; // Propagates NaNs in aji
528 real_t aa = max(abs1(A(i, i)), abs1(A(i, i) - A(i - 1, i - 1)));
529 real_t bb = min(abs1(A(i, i)), abs1(A(i, i) - A(i - 1, i - 1)));
530 real_t s = aa + ab;
531 if (ba * (ab / s) <= max(small_num, eps * (bb * (aa / s)))) {
532 // A(i,i-1) is negligible, take i as new istart.
533 A(i, i - 1) = zero;
534 istart = i;
535 break;
536 }
537 }
538 }
539
540 if (istart + 1 >= istop) {
541 k_defl = 0;
542 w[istart] = A(istart, istart);
543 istop = istart;
544 istart = ilo;
545 continue;
546 }
547
548 // Determine shift
549 TA a00, a01, a10, a11;
550 k_defl = k_defl + 1;
551 if (k_defl % non_convergence_limit == 0) {
552 // Exceptional shift
553 real_t s = abs(A(istop - 1, istop - 2));
554 if (istop > ilo + 2) s = s + abs(A(istop - 2, istop - 3));
555 a00 = dat1 * s + A(istop - 1, istop - 1);
556 a01 = dat2 * s;
557 a10 = s;
558 a11 = a00;
559 }
560 else {
561 // Wilkinson shift
562 a00 = A(istop - 2, istop - 2);
563 a10 = A(istop - 1, istop - 2);
564 a01 = A(istop - 2, istop - 1);
565 a11 = A(istop - 1, istop - 1);
566 }
567 complex_type<real_t> s1;
568 complex_type<real_t> s2;
569 lahqr_eig22(a00, a01, a10, a11, s1, s2);
570 if (abs1(s1 - A(istop - 1, istop - 1)) >
571 abs1(s2 - A(istop - 1, istop - 1)))
572 s1 = s2;
573
574 // We have already checked whether the subblock has split.
575 // If it has split, we can introduce any shift at the top of the new
576 // subblock. Now that we know the specific shift, we can also check
577 // whether we can introduce that shift somewhere else in the subblock.
578 TA sn;
579 real_t cs;
580 idx_t istart2 = istart;
581 if (istart + 2 < istop) {
582 for (idx_t i = istop - 2; i > istart; --i) {
583 TA h00 = A(i, i) - s1;
584 TA h10 = A(i + 1, i);
585 rotg(h00, h10, cs, sn);
586
587 if (abs1(conj(sn) * A(i, i - 1)) <=
588 eps * (abs1(A(i, i - 1)) + abs1(A(i, i + 1)))) {
589 istart2 = i;
590 break;
591 }
592 }
593 }
594
595 for (idx_t i = istart2; i < istop - 1; ++i) {
596 if (i == istart2) {
597 TA h00 = A(i, i) - s1;
598 TA h10 = A(i + 1, i);
599 rotg(h00, h10, cs, sn);
600 if (i > istart) A(i, i - 1) = A(i, i - 1) * cs;
601 }
602 else {
603 TA h00 = A(i, i - 1);
604 TA h10 = A(i + 1, i - 1);
605 rotg(h00, h10, cs, sn);
606 A(i, i - 1) = h00;
607 A(i + 1, i - 1) = zero;
608 }
609
610 // Apply G from the left to A
611 for (idx_t j = i; j < istop_m; ++j) {
612 TA tmp = cs * A(i, j) + sn * A(i + 1, j);
613 A(i + 1, j) = -conj(sn) * A(i, j) + cs * A(i + 1, j);
614 A(i, j) = tmp;
615 }
616 // Apply G**H from the right to A
617 for (idx_t j = istart_m; j < min(i + 3, istop); ++j) {
618 TA tmp = cs * A(j, i) + conj(sn) * A(j, i + 1);
619 A(j, i + 1) = -sn * A(j, i) + cs * A(j, i + 1);
620 A(j, i) = tmp;
621 }
622 if (want_z) {
623 // Apply G**H to Z from the right
624 for (idx_t j = 0; j < n; ++j) {
625 TA tmp = cs * Z(j, i) + conj(sn) * Z(j, i + 1);
626 Z(j, i + 1) = -sn * Z(j, i) + cs * Z(j, i + 1);
627 Z(j, i) = tmp;
628 }
629 }
630 }
631 }
632
633 return 0;
634}
635
636} // namespace tlapack
637
638#endif // TLAPACK_LAHQR_HH
constexpr internal::Forward FORWARD
Forward direction.
Definition types.hpp:376
constexpr internal::ColumnwiseStorage COLUMNWISE_STORAGE
Columnwise storage.
Definition types.hpp:409
constexpr T conj(const T &x) noexcept
Extends std::conj() to real datatypes.
Definition utils.hpp:100
constexpr real_type< T > abs1(const T &x)
1-norm absolute value, |Re(x)| + |Im(x)|
Definition utils.hpp:133
constexpr real_type< T > imag(const T &x) noexcept
Extends std::imag() to real datatypes.
Definition utils.hpp:86
#define TLAPACK_CSMATRIX
Macro for tlapack::concepts::ConstructableAndSliceableMatrix compatible with C++17.
Definition concepts.hpp:961
#define TLAPACK_VECTOR
Macro for tlapack::concepts::Vector compatible with C++17.
Definition concepts.hpp:906
void lahqr_eig22(T a00, T a01, T a10, T a11, complex_type< T > &s1, complex_type< T > &s2)
Computes the eigenvalues of a 2x2 matrix A.
Definition lahqr_eig22.hpp:34
int lahqr(bool want_t, bool want_z, size_type< matrix_t > ilo, size_type< matrix_t > ihi, matrix_t &A, vector_t &w, matrix_t &Z)
lahqr computes the eigenvalues and optionally the Schur factorization of an upper Hessenberg matrix,...
Definition lahqr.hpp:73
void larfg(storage_t storeMode, type_t< vector_t > &alpha, vector_t &x, type_t< vector_t > &tau)
Generates a elementary Householder reflection.
Definition larfg.hpp:73
void lahqr_schur22(T &a, T &b, T &c, T &d, complex_type< T > &s1, complex_type< T > &s2, T &cs, T &sn)
Computes the Schur factorization of a 2x2 matrix A.
Definition lahqr_schur22.hpp:44
int lahqr_shiftcolumn(const matrix_t &H, vector_t &v, complex_type< type_t< matrix_t > > s1, complex_type< type_t< matrix_t > > s2)
Given a 2-by-2 or 3-by-3 matrix H, lahqr_shiftcolumn calculates a multiple of the product: (H - s1*I)...
Definition lahqr_shiftcolumn.hpp:41
void rotg(T &a, T &b, T &c, T &s)
Construct plane rotation that eliminates b, such that:
Definition rotg.hpp:39
void rot(vectorX_t &x, vectorY_t &y, const c_type &c, const s_type &s)
Apply plane rotation:
Definition rot.hpp:44
#define tlapack_error(info, detailedInfo)
Error handler.
Definition exceptionHandling.hpp:142
#define tlapack_check_false(cond)
Throw an error if cond is true.
Definition exceptionHandling.hpp:113
typename traits::real_type_traits< Types..., int >::type real_type
The common real type of the list of types.
Definition scalar_type_traits.hpp:113