<T>LAPACK 0.1.2
C++ Template Linear Algebra PACKage
Loading...
Searching...
No Matches
trevc_forwardsolve.hpp
Go to the documentation of this file.
1
3// based on A. Schwarz et al., "Scalable eigenvector computation for the
4// non-symmetric eigenvalue problem"
5//
6// Copyright (c) 2025, University of Colorado Denver. All rights reserved.
7//
8// This file is part of <T>LAPACK.
9// <T>LAPACK is free software: you can redistribute it and/or modify it under
10// the terms of the BSD 3-Clause license. See the accompanying LICENSE file.
11
12#ifndef TLAPACK_TREVC_FORWARDSOLVE_HH
13#define TLAPACK_TREVC_FORWARDSOLVE_HH
14
16#include "tlapack/blas/asum.hpp"
19
20namespace tlapack {
21
76 const vector_colN_t& colN)
77{
78 using idx_t = size_type<matrix_T_t>;
79 using TT = type_t<matrix_T_t>;
80 using real_t = real_type<TT>;
82
83 const idx_t n = nrows(T);
84
85 tlapack_check(ncols(T) == n);
86 tlapack_check(size(v) == n);
87 tlapack_check(k < n);
88
89 real_t scale = real_t(1);
90
93
94 // Initialize v to [0, 1, -T( k, k+1:n-1 )]
95 for (idx_t i = 0; i < k; ++i) {
96 v[i] = TT(0);
97 }
98 v[k] = TT(1);
99 for (idx_t i = k + 1; i < n; ++i) {
100 v[i] = -T(k, i);
101 }
102
103 TT w = T(k, k); // eigenvalue
104
105 // Forward substitution to solve the system
106 auto T33 = slice(T, range(k + 1, n), range(k + 1, n));
107 auto v3 = slice(v, range(k + 1, n));
108 auto colN33 = slice(colN, range(k + 1, n));
109
110 // The matrix is real, so we need to consider potential
111 // 2x2 blocks for complex conjugate eigenvalue pairs
112 idx_t i = 0;
113 while (i < size(v3)) {
114 bool is_2x2_block = false;
115 if (i + 1 < size(v3)) {
116 if (T33(i + 1, i) != TT(0)) {
117 is_2x2_block = true;
118 }
119 }
120
121 if (is_2x2_block) {
122 // 2x2 block
123
124 // Step 1: update
125 idx_t ivmax = iamax(slice(v3, range(0, size(v3))));
126 real_t vmax = abs1(v3[ivmax]);
127
128 real_t tnorm1 = colN33[i];
129 real_t tnorm2 = colN33[i + 1];
133 trevc_protectupdate(abs(v3[i + 1]), tnorm2, vmax, sf_max);
135
136 if (scale1 != real_t(1)) {
137 // Apply scale1 to all of v3
138 for (idx_t j = 0; j < size(v3); ++j) {
139 v3[j] = scale1 * v3[j];
140 }
141 scale *= scale1;
142 }
143
144 for (idx_t j = 0; j < i; ++j) {
145 v3[i] -= T33(j, i) * v3[j];
146 v3[i + 1] -= T33(j, i + 1) * v3[j];
147 }
148
149 // Solve the 2x2 (transposed) system:
150 // [T33(i,i)-w T33(i+1,i) ] [v3[i] ] = [rhs1]
151 // [T33(i,i+1) T33(i+1,i+1)-w] [v3[i+1]] [rhs2]
152
153 TT a = T33(i, i) - w;
154 TT b = T33(i + 1, i);
155 TT c = T33(i, i + 1);
156 TT d = T33(i + 1, i + 1) - w;
157
158 TT scale2;
159 trevc_2x2solve(a, b, c, d, v3[i], v3[i + 1], scale2, sf_min,
160 sf_max);
161
162 // Apply scale2 to all of v3
163 if (scale2 != real_t(1)) {
164 scale *= scale2;
165 for (idx_t j = 0; j < i; ++j) {
166 v3[j] = scale2 * v3[j];
167 }
168 for (idx_t j = i + 2; j < size(v3); ++j) {
169 v3[j] = scale2 * v3[j];
170 }
171 }
172
173 i += 2;
174 }
175 else {
176 //
177 // 1x1 block
178 //
179
180 // Step 1: update
181 idx_t ivmax = iamax(slice(v3, range(0, size(v3))));
182 real_t vmax = abs1(v3[ivmax]);
183
184 real_t tnorm = colN33[i];
185 real_t scale1 =
187
188 if (scale1 != real_t(1)) {
189 // Apply scale1 to all of v3
190 for (idx_t j = 0; j < size(v3); ++j) {
191 v3[j] = scale1 * v3[j];
192 }
193 scale *= scale1;
194 }
195
196 for (idx_t j = 0; j < i; ++j) {
197 v3[i] -= T33(j, i) * v3[j];
198 }
199
200 // Step 2: division
201
202 real_t denom = T33(i, i) - w;
204 v3[i] = (scale2 * v3[i]) / denom;
205
206 if (scale2 != real_t(1)) {
207 // Apply scale2 to all of v3
208 for (idx_t j = 0; j < i; ++j) {
209 v3[j] = scale2 * v3[j];
210 }
211 for (idx_t j = i + 1; j < size(v3); ++j) {
212 v3[j] = scale2 * v3[j];
213 }
214 scale *= scale2;
215 }
216
217 i += 1;
218 }
219 }
220
221 v[k] = scale * v[k];
222}
223
227template <TLAPACK_MATRIX matrix_T_t,
233 vector_v_t& v,
235 const vector_colN_t& colN)
236{
237 using idx_t = size_type<matrix_T_t>;
238 using TT = type_t<matrix_T_t>;
239 using real_t = real_type<TT>;
241
242 const idx_t n = nrows(T);
243
244 tlapack_check(ncols(T) == n);
245 tlapack_check(size(v) == n);
246 tlapack_check(k < n);
247
250
251 real_t scale = real_t(1);
252
253 // Initialize v to [0, 1, -T( k, k+1:n-1 )]
254 for (idx_t i = 0; i < k; ++i) {
255 v[i] = TT(0);
256 }
257 v[k] = TT(1);
258 for (idx_t i = k + 1; i < n; ++i) {
259 v[i] = -conj(T(k, i));
260 }
261
262 TT w = T(k, k); // eigenvalue
263
264 // Forward substitution to solve the system
265 auto T33 = slice(T, range(k + 1, n), range(k + 1, n));
266 auto v3 = slice(v, range(k + 1, n));
267 auto colN33 = slice(colN, range(k + 1, n));
268
269 // The matrix is complex, so there are no two-by-two blocks to
270 // consider
271 for (idx_t i = 0; i < size(v3); ++i) {
272 // Step 1: update
273 idx_t ivmax = iamax(slice(v3, range(0, size(v3))));
274 real_t vmax = abs1(v3[ivmax]);
275
276 real_t tnorm = colN33[i];
278
279 if (scale1 != real_t(1)) {
280 // Apply scale1 to all of v3
281 for (idx_t j = 0; j < size(v3); ++j) {
282 v3[j] = scale1 * v3[j];
283 }
284 scale *= scale1;
285 }
286
287 for (idx_t j = 0; j < i; ++j) {
288 v3[i] -= conj(T33(j, i)) * v3[j];
289 }
290
291 // Step 2: division
292
293 TT denom = conj(T33(i, i) - w);
295 v3[i] = ladiv(scale2 * v3[i], denom);
296
297 if (scale2 != real_t(1)) {
298 // Apply scale2 to all of v3
299 for (idx_t j = 0; j < i; ++j) {
300 v3[j] = scale2 * v3[j];
301 }
302 for (idx_t j = i + 1; j < size(v3); ++j) {
303 v3[j] = scale2 * v3[j];
304 }
305 scale *= scale2;
306 }
307 }
308
309 v[k] = scale * v[k];
310}
311
367template <TLAPACK_MATRIX matrix_T_t,
375 const vector_colN_t& colN)
376{
377 using idx_t = size_type<matrix_T_t>;
378 using TT = type_t<matrix_T_t>;
380
381 const idx_t n = nrows(T);
382
383 tlapack_check(ncols(T) == n);
384 tlapack_check(size(v_r) == n);
385 tlapack_check(size(v_i) == n);
386 tlapack_check(k < n);
387
388 const TT sf_max = safe_max<TT>();
389 const TT sf_min = safe_min<TT>();
390
391 TT alpha = T(k, k);
392 TT beta = T(k, k + 1);
393 TT gamma = T(k + 1, k);
394
395 // real part of eigenvalue
396 TT wr = alpha;
397 // imaginary part of eigenvalue
398 TT wi = sqrt(abs(beta)) * sqrt(abs(gamma));
399
400 // Depending of whether beta or gamma is bigger, we set y2 = 1 or y3 = i
401 TT y2, y3;
402 if (abs(gamma) >= abs(beta)) {
403 y2 = TT(1);
404 y3 = -wi / gamma;
405 }
406 else {
407 y2 = -wi / beta;
408 y3 = -TT(1);
409 }
410
411 TT scale = TT(1);
412
413 // Initialize v_real and v_imag to -[y2; i * y3]**H * T(k:k+1, k+2:n-1)
414 for (idx_t i = 0; i < k; ++i) {
415 v_r[i] = TT(0);
416 v_i[i] = TT(0);
417 }
418 v_r[k] = y2;
419 v_i[k] = TT(0);
420 v_r[k + 1] = TT(0);
421 v_i[k + 1] = y3;
422 for (idx_t i = k + 2; i < n; ++i) {
423 v_r[i] = -T(k, i) * y2;
424 v_i[i] = -T(k + 1, i) * y3;
425 }
426
427 // Now do a complex forward substitution using the shift wr + i*wi
428 // but without forming complex numbers explicitly
429 // on top of that, we need to take care of potential 2x2 blocks in T44
430 auto T44 = slice(T, range(k + 2, n), range(k + 2, n));
431 auto v4_r = slice(v_r, range(k + 2, n));
432 auto v4_i = slice(v_i, range(k + 2, n));
433 auto colN44 = slice(colN, range(k + 2, n));
434
435 idx_t i = 0;
436 while (i < size(v4_r)) {
437 bool is_2x2_block = false;
438 if (i + 1 < size(v4_r)) {
439 if (T44(i + 1, i) != TT(0)) {
440 is_2x2_block = true;
441 }
442 }
443
444 if (is_2x2_block) {
445 // 2x2 block
446
447 // Step 1: update
448 idx_t ivrmax = iamax(slice(v4_r, range(0, size(v4_r))));
450 idx_t ivimax = iamax(slice(v4_i, range(0, size(v4_i))));
452
453 TT tnorm1 = colN44[i];
454 TT tnorm2 = colN44[i + 1];
455 TT scale1ar =
457 TT scale1ai =
459 TT scale1a = min(scale1ar, scale1ai);
460 TT scale1br =
462 TT scale1bi =
464 TT scale1b = min(scale1br, scale1bi);
465 TT scale1 = min(scale1a, scale1b);
466
467 if (scale1 != TT(1)) {
468 // Apply scale1 to all of v4_r and v4_i
469 for (idx_t j = 0; j < size(v4_r); ++j) {
470 v4_r[j] = scale1 * v4_r[j];
471 v4_i[j] = scale1 * v4_i[j];
472 }
473 scale *= scale1;
474 }
475
476 for (idx_t j = 0; j < i; ++j) {
477 v4_r[i] -= T44(j, i) * v4_r[j];
478 v4_i[i] -= T44(j, i) * v4_i[j];
479 v4_r[i + 1] -= T44(j, i + 1) * v4_r[j];
480 v4_i[i + 1] -= T44(j, i + 1) * v4_i[j];
481 }
482
483 // Solve the (transposed) complex 2x2 system:
484 // y**H
485 // *
486 // [T44(i,i)- (wr + i*wi) T44(i,i+1) ]
487 // [T44(i+1, i) T44(i+1, i+1)- (wr + i*wi)]
488 // =
489 // [v4_r[i] + i*v4_i[i] ]
490 // [v4_r[i+1] + i*v4_i[i+1] ]
491
492 TT a11r = T44(i, i) - wr;
493 TT a11i = wi;
494 // a12 and a21 are switched to transpose the system
495 TT a12 = T44(i + 1, i);
496 TT a21 = T44(i, i + 1);
497 TT a22r = T44(i + 1, i + 1) - wr;
498 TT a22i = wi;
499
500 TT scale2;
501 trevc_2x2solve(a11r, a11i, a12, TT(0), a21, TT(0), a22r, a22i,
502 v4_r[i], v4_i[i], v4_r[i + 1], v4_i[i + 1], scale2,
503 sf_min, sf_max);
504
505 if (scale2 != TT(1)) {
506 // Apply scale2 to all of v4_r and v4_i
507 scale *= scale2;
508 for (idx_t j = 0; j < i; ++j) {
509 v4_r[j] = scale2 * v4_r[j];
510 v4_i[j] = scale2 * v4_i[j];
511 }
512 for (idx_t j = i + 2; j < size(v4_r); ++j) {
513 v4_r[j] = scale2 * v4_r[j];
514 v4_i[j] = scale2 * v4_i[j];
515 }
516 }
517
518 i += 2;
519 }
520 else {
521 // 1x1 block
522
523 // Step 1: update
524 idx_t ivrmax = iamax(slice(v4_r, range(0, size(v4_r))));
526 idx_t ivimax = iamax(slice(v4_i, range(0, size(v4_i))));
528
529 TT tnorm = colN44[i];
530 TT scale1a =
532 TT scale1b =
534 TT scale1 = min(scale1a, scale1b);
535
536 if (scale1 != TT(1)) {
537 // Apply scale1 to all of v4_r and v4_i
538 for (idx_t j = 0; j < size(v4_r); ++j) {
539 v4_r[j] = scale1 * v4_r[j];
540 v4_i[j] = scale1 * v4_i[j];
541 }
542 scale *= scale1;
543 }
544
545 for (idx_t j = 0; j < i; ++j) {
546 v4_r[i] -= T44(j, i) * v4_r[j];
547 v4_i[i] -= T44(j, i) * v4_i[j];
548 }
549
550 // Do the complex division:
551 // (v4_r[i] + i*v4_i[i]) / (T44(i, i) - (wr + i*wi))
552 TT scale2 = trevc_protectdiv(v4_r[i], v4_i[i], T44(i, i) - wr, wi,
553 sf_min, sf_max);
554
555 TT tr, ti;
556 ladiv(scale2 * v4_r[i], scale2 * v4_i[i], T44(i, i) - wr, wi, tr,
557 ti);
558 v4_r[i] = tr;
559 v4_i[i] = ti;
560
561 if (scale2 != TT(1)) {
562 // Apply scale2 to all of v4_r and v4_i
563 for (idx_t j = 0; j < i; ++j) {
564 v4_r[j] = scale2 * v4_r[j];
565 v4_i[j] = scale2 * v4_i[j];
566 }
567 for (idx_t j = i + 1; j < size(v4_r); ++j) {
568 v4_r[j] = scale2 * v4_r[j];
569 v4_i[j] = scale2 * v4_i[j];
570 }
571 scale *= scale2;
572 }
573
574 i += 1;
575 }
576 }
577
578 v_r[k] = scale * v_r[k];
579 v_i[k] = scale * v_i[k];
580 v_r[k + 1] = scale * v_r[k + 1];
581 v_i[k + 1] = scale * v_i[k + 1];
582}
583
584} // namespace tlapack
585
586#endif // TLAPACK_TREVC_FORWARDSOLVE_HH
#define TLAPACK_VECTOR
Macro for tlapack::concepts::Vector compatible with C++17.
Definition concepts.hpp:906
#define TLAPACK_MATRIX
Macro for tlapack::concepts::Matrix compatible with C++17.
Definition concepts.hpp:896
void ladiv(const real_t &a, const real_t &b, const real_t &c, const real_t &d, real_t &p, real_t &q)
Performs complex division in real arithmetic.
Definition ladiv.hpp:39
size_type< vector_t > iamax(const vector_t &x, const IamaxOpts< abs_f > &opts)
Return .
Definition iamax.hpp:234
#define tlapack_check(cond)
Throw an error if cond is false.
Definition exceptionHandling.hpp:98
Sort the numbers in D in increasing order (if ID = 'I') or in decreasing order (if ID = 'D' ).
Definition arrayTraits.hpp:15
typename traits::real_type_traits< Types..., int >::type real_type
The common real type of the list of types.
Definition scalar_type_traits.hpp:113
void trevc_forwardsolve_double(const matrix_T_t &T, vector_v_t &v_r, vector_v_t &v_i, const size_type< matrix_T_t > k, const vector_colN_t &colN)
Calculate the k-th left eigenvector pair of T using forward substitution.
Definition trevc_forwardsolve.hpp:371
constexpr T conj(const T &x) noexcept
Extends std::conj() to real datatypes.
Definition utils.hpp:100
void trevc_forwardsolve_single(const matrix_T_t &T, vector_v_t &v, const size_type< matrix_T_t > k, const vector_colN_t &colN)
Calculate the k-th left eigenvector of T using forward substitution.
Definition trevc_forwardsolve.hpp:73
constexpr real_type< T > abs1(const T &x)
1-norm absolute value, |Re(x)| + |Im(x)|
Definition utils.hpp:133
T trevc_protectupdate(T ynorm, T tnorm, T xnorm, T sf_max)
Given the infinity norms of the matrices Y, T, and X, calculate a scaling factor scale in (0,...
Definition trevc_protect.hpp:138
T trevc_protectdiv(T a, T b, T sf_min, T sf_max)
Given two numbers a and b, calculate a scaling factor alpha in (0, 1] such that the division (a * alp...
Definition trevc_protect.hpp:37
void trevc_2x2solve(T a, T b, T c, T d, T &x1, T &x2, T &scale, T sf_min, T sf_max)
Robustly solve a 2x2 system of equations (a b) (x1) = scale * (rhs1) (c d) (x2) (rhs2)
Definition trevc_protect.hpp:214