<T>LAPACK 0.1.2
C++ Template Linear Algebra PACKage
Loading...
Searching...
No Matches
trevc_backsolve.hpp
Go to the documentation of this file.
1
3// based on A. Schwarz et al., "Scalable eigenvector computation for the
4// non-symmetric eigenvalue problem"
5//
6// Copyright (c) 2025, University of Colorado Denver. All rights reserved.
7//
8// This file is part of <T>LAPACK.
9// <T>LAPACK is free software: you can redistribute it and/or modify it under
10// the terms of the BSD 3-Clause license. See the accompanying LICENSE file.
11
12#ifndef TLAPACK_TREVC_BACKSOLVE_HH
13#define TLAPACK_TREVC_BACKSOLVE_HH
14
16#include "tlapack/blas/asum.hpp"
20namespace tlapack {
21
82 const vector_colN_t& colN)
83{
84 using idx_t = size_type<matrix_T_t>;
85 using TT = type_t<matrix_T_t>;
86 using real_t = real_type<TT>;
88
89 const idx_t n = nrows(T);
90
91 tlapack_check(ncols(T) == n);
92 tlapack_check(size(v) == n);
93 tlapack_check(k < n);
94
97
98 // Initialize v to [-T(0:k-1, k), 1, 0]
99 for (idx_t i = 0; i < k; ++i) {
100 v[i] = -T(i, k);
101 }
102 v[k] = TT(1);
103 for (idx_t i = k + 1; i < n; ++i) {
104 v[i] = TT(0);
105 }
106 real_t scale = real_t(1);
107
108 TT w = T(k, k); // eigenvalue
109
110 // Backsubstitution to solve the system
111 auto T11 = slice(T, range(0, k), range(0, k));
112 auto v1 = slice(v, range(0, k));
113
114 // The matrix is real, so we need to consider potential
115 // 2x2 blocks for complex conjugate eigenvalue pairs
116
117 idx_t ii = 0;
118 while (ii < k) {
119 idx_t i = k - 1 - ii;
120 bool is_2x2_block = false;
121 if (i > 0) {
122 if (T11(i, i - 1) != TT(0)) {
123 is_2x2_block = true;
124 }
125 }
126
127 if (is_2x2_block) {
128 // 2x2 block
129 // Solve the 2x2 system:
130 // [T11(i-1,i-1)-w T11(i-1,i) ] [v1[i-1]] = scale1 * [rhs1]
131 // [T11(i, i-1) T11(i, i)-w ] [v1[i] ] [rhs2]
132 TT a = T11(i - 1, i - 1) - w;
133 TT b = T11(i - 1, i);
134 TT c = T11(i, i - 1);
135 TT d = T11(i, i) - w;
136
138 trevc_2x2solve(a, b, c, d, v1[i - 1], v1[i], scale1, sf_min,
139 sf_max);
140
141 // Apply scale1 to all of v1
142 if (scale1 != real_t(1)) {
143 scale *= scale1;
144 for (idx_t j = 0; j + 1 < i; ++j) {
145 v1[j] = scale1 * v1[j];
146 }
147 for (idx_t j = i + 1; j < k; ++j) {
148 v1[j] = scale1 * v1[j];
149 }
150 }
151
152 // Safely update the right-hand side
153 if (i > 1) {
154 real_t ivmax = iamax(slice(v1, range(0, i - 1)));
155 real_t vmax = abs1(v1[ivmax]);
156
157 real_t tmax = colN[i - 1];
158
159 real_t xnorm = max(abs1(v1[i]), abs1(v1[i - 1]));
160
162 if (scale2 != real_t(1)) {
163 // Apply update with scaling
164 for (idx_t j = 0; j + 1 < i; ++j) {
165 v1[j] = (scale2 * v1[j]) -
166 T11(j, i - 1) * (scale2 * v1[i - 1]) -
167 T11(j, i) * (scale2 * v1[i]);
168 }
169
170 // Apply scale2 to all of v1
171 for (idx_t j = i - 1; j < k; ++j) {
172 v1[j] = scale2 * v1[j];
173 }
174
175 scale *= scale2;
176 }
177 else {
178 for (idx_t j = 0; j + 1 < i; ++j) {
179 v1[j] -= T11(j, i - 1) * v1[i - 1];
180 v1[j] -= T11(j, i) * v1[i];
181 }
182 }
183 }
184
185 ii += 2;
186 }
187 else {
188 // 1x1 block
189
190 // In the paper, they scale here so that denom cannot overflow
191 // but this only happens if T11(i,i) and w are both very large
192 // not just if the matrix is ill-conditioned.
193 // The equations to take the scaling into account also don't
194 // seem fully correct.
195 TT denom = T11(i, i) - w;
196 // Scale factor so that we can safely calculate v1[i] / (T11(i, i) -
197 // w)
199
200 // Safely execute the division
201 v1[i] = (scale1 * v1[i]) / denom;
202
203 if (scale1 != real_t(1)) {
204 scale *= scale1;
205 // Apply scale1 to all of v1
206 for (idx_t j = 0; j < i; ++j) {
207 v1[j] = scale1 * v1[j];
208 }
209 for (idx_t j = i + 1; j < k; ++j) {
210 v1[j] = scale1 * v1[j];
211 }
212 }
213
214 // Now update v1[0:i-1] -= T11(0:i-1, i) * v1[i]
215 if (i > 0) {
216 real_t ivmax = iamax(slice(v1, range(0, i)));
217 real_t vmax = abs1(v1[ivmax]);
218
219 real_t tmax = colN[i]; // use precomputed column norms
220
221 real_t xnorm = abs1(v1[i]);
222
223 // Scale factor so that
224 // (scale2*v1[0:i-1]) - T11(0:i-1, i) * (scale2 * v1[i]) does
225 // not overflow
227 if (scale2 != real_t(1)) {
228 for (idx_t j = 0; j < i; ++j) {
229 v1[j] = (scale2 * v1[j]) - T11(j, i) * (scale2 * v1[i]);
230 }
231 // Apply scale2 to all of v1
232 for (idx_t j = i; j < k; ++j) {
233 v1[j] = scale2 * v1[j];
234 }
235
236 scale *= scale2;
237 }
238 else {
239 for (idx_t j = 0; j < i; ++j) {
240 v1[j] = (v1[j]) - T11(j, i) * (v1[i]);
241 }
242 }
243 }
244
245 ii += 1;
246 }
247 }
248
249 v[k] = scale * v[k];
250}
251
255template <TLAPACK_MATRIX matrix_T_t,
261 vector_v_t& v,
263 const vector_colN_t& colN)
264{
265 using idx_t = size_type<matrix_T_t>;
266 using TT = type_t<matrix_T_t>;
267 using real_t = real_type<TT>;
269
270 const idx_t n = nrows(T);
271
272 tlapack_check(ncols(T) == n);
273 tlapack_check(size(v) == n);
274 tlapack_check(k < n);
275
278
279 // Initialize v to [-T(0:k-1, k), 1, 0]
280 for (idx_t i = 0; i < k; ++i) {
281 v[i] = -T(i, k);
282 }
283 v[k] = TT(1);
284 for (idx_t i = k + 1; i < n; ++i) {
285 v[i] = TT(0);
286 }
287 real_t scale = real_t(1);
288
289 TT w = T(k, k); // eigenvalue
290
291 // Backsubstitution to solve the system
292 auto T11 = slice(T, range(0, k), range(0, k));
293 auto v1 = slice(v, range(0, k));
294
295 for (idx_t ii = 0; ii < k; ++ii) {
296 idx_t i = k - 1 - ii;
297 TT denom = T11(i, i) - w;
298 // Scale factor so that we can safely calculate v1[i] / (T11(i, i) - w)
300
301 // Safely execute the division
302 v1[i] = ladiv(scale1 * v1[i], denom);
303
304 if (scale1 != real_t(1)) {
305 scale *= scale1;
306 // Apply scale1 to all of v1
307 for (idx_t j = 0; j < i; ++j) {
308 v1[j] = scale1 * v1[j];
309 }
310 for (idx_t j = i + 1; j < k; ++j) {
311 v1[j] = scale1 * v1[j];
312 }
313 }
314
315 // Now update v1[0:i-1] -= T11(0:i-1, i) * v1[i]
316 if (i > 0) {
317 real_t ivmax = iamax(slice(v1, range(0, i)));
318 real_t vmax = abs1(v1[ivmax]);
319
320 real_t tmax = colN[i]; // use precomputed column norms
321
322 real_t xnorm = abs1(v1[i]);
323
324 // Scale factor so that
325 // (scale2*v1[0:i-1]) - T11(0:i-1, i) * (scale2 * v1[i]) does not
326 // overflow
328 if (scale2 != real_t(1)) {
329 for (idx_t j = 0; j < i; ++j) {
330 v1[j] = (scale2 * v1[j]) - T11(j, i) * (scale2 * v1[i]);
331 }
332 // Apply scale2 to all of v1
333 for (idx_t j = i; j < k; ++j) {
334 v1[j] = scale2 * v1[j];
335 }
336
337 scale *= scale2;
338 }
339 else {
340 for (idx_t j = 0; j < i; ++j) {
341 v1[j] = (v1[j]) - T11(j, i) * (v1[i]);
342 }
343 }
344 }
345 }
346
347 v[k] = scale * v[k];
348}
349
410template <TLAPACK_MATRIX matrix_T_t,
419 const vector_colN_t& colN)
420{
421 using idx_t = size_type<matrix_T_t>;
422 using TT = type_t<matrix_T_t>;
424
425 const idx_t n = nrows(T);
426
427 tlapack_check(ncols(T) == n);
428 tlapack_check(size(v_r) == n);
429 tlapack_check(size(v_i) == n);
430 tlapack_check(k + 1 < n);
431
432 const TT sf_max = safe_max<TT>();
433 const TT sf_min = safe_min<TT>();
434
435 TT alpha = T(k, k);
436 TT beta = T(k, k + 1);
437 TT gamma = T(k + 1, k);
438
439 // real part of eigenvalue
440 TT wr = alpha;
441 // imaginary part of eigenvalue
442 TT wi = sqrt(abs(beta)) * sqrt(abs(gamma));
443
444 // Depending of whether beta or gamma is bigger, we set x2 = 1 or x3 = i
445 TT x2, x3;
446 if (abs(beta) >= abs(gamma)) {
447 x2 = TT(1);
448 x3 = wi / beta;
449 }
450 else {
451 x2 = -wi / gamma;
452 x3 = TT(1);
453 }
454
455 // Initialize v_real and v_imag to -T(0:k-1, k:k+1) * [x2; i * x3]
456 for (idx_t i = 0; i < k; ++i) {
457 v_r[i] = -T(i, k) * x2;
458 v_i[i] = -T(i, k + 1) * x3;
459 }
460 v_r[k] = x2;
461 v_i[k] = TT(0);
462 v_r[k + 1] = TT(0);
463 v_i[k + 1] = x3;
464 for (idx_t i = k + 2; i < n; ++i) {
465 v_r[i] = TT(0);
466 v_i[i] = TT(0);
467 }
468
469 TT scale = TT(1);
470
471 // Now do a complex backsustitution using the shift wr + i*wi
472 // but without forming complex numbers explicitly
473 // on top of that, we need to take care of potential 2x2 blocks in T11
474 auto T11 = slice(T, range(0, k), range(0, k));
475 auto v1_r = slice(v_r, range(0, k));
476 auto v1_i = slice(v_i, range(0, k));
477
478 idx_t ii = 0;
479 while (ii < k) {
480 idx_t i = k - 1 - ii;
481 bool is_2x2_block = false;
482 if (i > 0) {
483 if (T11(i, i - 1) != TT(0)) {
484 is_2x2_block = true;
485 }
486 }
487
488 if (is_2x2_block) {
489 // 2x2 block
490
491 // Solve the complex 2x2 system:
492 // [T11(i-1,i-1)- (wr + i*wi) T11(i-1,i) ]
493 // [T11(i, i-1) T11(i, i)- (wr + i*wi)]
494 // *
495 // x
496 // =
497 // [v1_r[i-1] + i*v1_i[i-1]]
498 // [v1_r[i] + i*v1_i[i] ]
499
500 TT a11r = T11(i - 1, i - 1) - wr;
501 TT a11i = -wi;
502 TT a12 = T11(i - 1, i);
503 TT a21 = T11(i, i - 1);
504 TT a22r = T11(i, i) - wr;
505 TT a22i = -wi;
506
507 TT b1r = v1_r[i - 1];
508 TT b1i = v1_i[i - 1];
509 TT b2r = v1_r[i];
510 TT b2i = v1_i[i];
511
512 TT scale1;
513 trevc_2x2solve(a11r, a11i, a12, TT(0), a21, TT(0), a22r, a22i, b1r,
515
516 v1_r[i - 1] = b1r;
517 v1_i[i - 1] = b1i;
518 v1_r[i] = b2r;
519 v1_i[i] = b2i;
520
521 if (scale1 != TT(1)) {
522 scale *= scale1;
523 // Apply scale1 to all of v1
524 for (idx_t j = 0; j + 1 < i; ++j) {
525 v1_r[j] = scale1 * v1_r[j];
526 v1_i[j] = scale1 * v1_i[j];
527 }
528 for (idx_t j = i + 1; j < k; ++j) {
529 v1_r[j] = scale1 * v1_r[j];
530 v1_i[j] = scale1 * v1_i[j];
531 }
532 }
533
534 if (i > 1) {
535 idx_t ivrmax = iamax(slice(v1_r, range(0, i - 1)));
536 idx_t ivimax = iamax(slice(v1_i, range(0, i - 1)));
537 TT vrmax = abs(v1_r[ivrmax]);
538 TT vimax = abs(v1_i[ivimax]);
539
540 TT tmax = colN[i] + colN[i - 1]; // approximate 2xn block norm
541
542 TT xmaxr = abs(v1_r[i]) + abs(v1_r[i - 1]);
543 TT xmaxi = abs(v1_i[i]) + abs(v1_i[i - 1]);
544
547 TT scale2 = min(scale2r, scale2i);
548
549 if (scale2 != TT(1)) {
550 // Apply update with scaling
551 for (idx_t j = 0; j + 1 < i; ++j) {
552 // Real part
553 v1_r[j] = (scale2 * v1_r[j]) -
554 T11(j, i - 1) * (scale2 * v1_r[i - 1]) -
555 T11(j, i) * (scale2 * v1_r[i]);
556
557 // Imaginary part
558 v1_i[j] = (scale2 * v1_i[j]) -
559 T11(j, i - 1) * (scale2 * v1_i[i - 1]) -
560 T11(j, i) * (scale2 * v1_i[i]);
561 }
562
563 // Apply scale2 to all of v1
564 for (idx_t j = i - 1; j < k; ++j) {
565 v1_r[j] = scale2 * v1_r[j];
566 v1_i[j] = scale2 * v1_i[j];
567 }
568
569 scale *= scale2;
570 }
571 else {
572 // Update the right-hand side
573 for (idx_t j = 0; j + 1 < i; ++j) {
574 // Real part
575 v1_r[j] -= T11(j, i - 1) * v1_r[i - 1];
576 v1_r[j] -= T11(j, i) * v1_r[i];
577
578 // Imaginary part
579 v1_i[j] -= T11(j, i - 1) * v1_i[i - 1];
580 v1_i[j] -= T11(j, i) * v1_i[i];
581 }
582 }
583 }
584
585 ii += 2;
586 }
587 else {
588 // 1x1 block
589
590 // (v1_r[i] + i*v1_i[i]) / (T11(i, i) - (wr + i*wi))
591 TT scale1 = trevc_protectdiv(v1_r[i], v1_i[i], T11(i, i) - wr, -wi,
592 sf_min, sf_max);
593 TT a, b;
594 ladiv(scale1 * v1_r[i], scale1 * v1_i[i], T11(i, i) - wr, -wi, a,
595 b);
596 v1_r[i] = a;
597 v1_i[i] = b;
598
599 if (scale1 != TT(1)) {
600 scale *= scale1;
601 // Apply scale1 to all of v1
602 for (idx_t j = 0; j < i; ++j) {
603 v1_r[j] = scale1 * v1_r[j];
604 v1_i[j] = scale1 * v1_i[j];
605 }
606 for (idx_t j = i + 1; j < k; ++j) {
607 v1_r[j] = scale1 * v1_r[j];
608 v1_i[j] = scale1 * v1_i[j];
609 }
610 }
611
612 if (i > 0) {
613 idx_t ivrmax = iamax(slice(v1_r, range(0, i)));
614 idx_t ivimax = iamax(slice(v1_i, range(0, i)));
615 TT vrmax = abs(v1_r[ivrmax]);
616 TT vimax = abs(v1_i[ivimax]);
617
618 TT scale2r =
620 TT scale2i =
622 TT scale2 = min(scale2r, scale2i);
623
624 if (scale2 != TT(1)) {
625 for (idx_t j = 0; j < i; ++j) {
626 v1_r[j] =
627 (scale2 * v1_r[j]) - T11(j, i) * (scale2 * v1_r[i]);
628 v1_i[j] =
629 (scale2 * v1_i[j]) - T11(j, i) * (scale2 * v1_i[i]);
630 }
631 // Apply scale2 to all of v1
632 for (idx_t j = i; j < k; ++j) {
633 v1_r[j] = scale2 * v1_r[j];
634 v1_i[j] = scale2 * v1_i[j];
635 }
636
637 scale *= scale2;
638 }
639 else {
640 // Update the right-hand side
641 for (idx_t j = 0; j < i; ++j) {
642 v1_r[j] -= T11(j, i) * v1_r[i];
643 v1_i[j] -= T11(j, i) * v1_i[i];
644 }
645 }
646 }
647
648 ii += 1;
649 }
650 }
651
652 v_r[k] = scale * v_r[k];
653 v_r[k + 1] = scale * v_r[k + 1];
654 v_i[k] = scale * v_i[k];
655 v_i[k + 1] = scale * v_i[k + 1];
656}
657
658} // namespace tlapack
659
660#endif // TLAPACK_TREVC3_BACKSOLVE_HH
#define TLAPACK_VECTOR
Macro for tlapack::concepts::Vector compatible with C++17.
Definition concepts.hpp:906
#define TLAPACK_MATRIX
Macro for tlapack::concepts::Matrix compatible with C++17.
Definition concepts.hpp:896
void ladiv(const real_t &a, const real_t &b, const real_t &c, const real_t &d, real_t &p, real_t &q)
Performs complex division in real arithmetic.
Definition ladiv.hpp:39
size_type< vector_t > iamax(const vector_t &x, const IamaxOpts< abs_f > &opts)
Return .
Definition iamax.hpp:234
#define tlapack_check(cond)
Throw an error if cond is false.
Definition exceptionHandling.hpp:98
Sort the numbers in D in increasing order (if ID = 'I') or in decreasing order (if ID = 'D' ).
Definition arrayTraits.hpp:15
void trevc_backsolve_double(const matrix_T_t &T, vector_v_t &v_r, vector_v_t &v_i, const size_type< matrix_T_t > k, const vector_colN_t &colN)
Calculate the k-th right eigenvector pair of T using backsubstitution.
Definition trevc_backsolve.hpp:415
typename traits::real_type_traits< Types..., int >::type real_type
The common real type of the list of types.
Definition scalar_type_traits.hpp:113
constexpr real_type< T > abs1(const T &x)
1-norm absolute value, |Re(x)| + |Im(x)|
Definition utils.hpp:133
T trevc_protectupdate(T ynorm, T tnorm, T xnorm, T sf_max)
Given the infinity norms of the matrices Y, T, and X, calculate a scaling factor scale in (0,...
Definition trevc_protect.hpp:138
T trevc_protectdiv(T a, T b, T sf_min, T sf_max)
Given two numbers a and b, calculate a scaling factor alpha in (0, 1] such that the division (a * alp...
Definition trevc_protect.hpp:37
void trevc_backsolve_single(const matrix_T_t &T, vector_v_t &v, const size_type< matrix_T_t > k, const vector_colN_t &colN)
Calculate the k-th right eigenvector of T using backsubstitution.
Definition trevc_backsolve.hpp:79
void trevc_2x2solve(T a, T b, T c, T d, T &x1, T &x2, T &scale, T sf_min, T sf_max)
Robustly solve a 2x2 system of equations (a b) (x1) = scale * (rhs1) (c d) (x2) (rhs2)
Definition trevc_protect.hpp:214