<T>LAPACK 0.1.2
C++ Template Linear Algebra PACKage
Loading...
Searching...
No Matches
trevc_protect.hpp
Go to the documentation of this file.
1
3// based on A. Schwarz et al., "Robust parallel eigenvector computation for the
4// non-symmetric eigenvalue problem"
5//
6// Copyright (c) 2025, University of Colorado Denver. All rights reserved.
7//
8// This file is part of <T>LAPACK.
9// <T>LAPACK is free software: you can redistribute it and/or modify it under
10// the terms of the BSD 3-Clause license. See the accompanying LICENSE file.
11
12#ifndef TLAPACK_TREVC_PROTECT_HH
13#define TLAPACK_TREVC_PROTECT_HH
14
16#include "tlapack/blas/asum.hpp"
18
19namespace tlapack {
20
36template <TLAPACK_SCALAR T, enable_if_t<is_real<T>, int> = 0>
38{
39 T alpha = T(1);
40
41 if (abs(b) < sf_min) {
42 if (abs(a) > abs(b) * sf_max) alpha = (abs(b) * sf_max) / abs(a);
43 }
44 else {
45 if (abs(b) < T(1))
46 if (abs(a) > abs(b) * sf_max) alpha = T(1) / abs(a);
47 }
48 return alpha;
49}
50
73template <TLAPACK_SCALAR T, enable_if_t<is_complex<T>, int> = 0>
84
109template <TLAPACK_SCALAR T, enable_if_t<is_real<T>, int> = 0>
112{
113 real_type<T> a1 = real_type<T>(2) * (abs(ar) + abs(ai));
114 real_type<T> b1 = abs(br) + abs(bi);
115
117}
118
137template <TLAPACK_SCALAR T, enable_if_t<is_real<T>, int> = 0>
139{
140 T xi = T(1);
141 if (xnorm <= T(1)) {
142 if (tnorm * xnorm > sf_max - ynorm) {
143 xi = T(0.5);
144 }
145 }
146 else {
147 if (tnorm > (sf_max - ynorm) / xnorm) {
148 xi = T(1) / (T(2) * xnorm);
149 }
150 }
151 return xi;
152}
153
165template <TLAPACK_SCALAR T, enable_if_t<is_real<T>, int> = 0>
167{
168 T xi = T(1);
169 if (a * b > 0)
170 if (abs(a) > (sf_max - abs(b))) xi = T(0.5);
171 return xi;
172}
173
185template <TLAPACK_SCALAR T, enable_if_t<is_complex<T>, int> = 0>
187{
190 return std::min<real_type<T>>(xir, xii);
191}
192
213template <TLAPACK_SCALAR T, enable_if_t<is_real<T>, int> = 0>
215 T a, T b, T c, T d, T& x1, T& x2, T& scale, T sf_min, T sf_max)
216{
217 //
218 // Step 1: compute LU factorization with complete pivoting
219 //
220 T abs_a = abs(a);
221 T abs_b = abs(b);
222 T abs_c = abs(c);
223 T abs_d = abs(d);
224
225 // 0 if a is largest
226 // 1 if b is largest
227 // 2 if c is largest
228 // 3 if d is largest
229 int ilargest = 0;
230 T largest = abs_a;
231 if (abs_b > largest) {
232 ilargest = 1;
233 largest = abs_b;
234 }
235 if (abs_c > largest) {
236 ilargest = 2;
237 largest = abs_c;
238 }
239 if (abs_d > largest) {
240 ilargest = 3;
241 largest = abs_d;
242 }
243
244 // Swap rows and columns to have the largest element in a
245 bool col_swapped;
246 if (ilargest == 0) {
247 // a is largest, do nothing
248 col_swapped = false;
249 }
250 else if (ilargest == 1) {
251 // b is largest
252
253 // swap columns
254 std::swap(a, b);
255 std::swap(c, d);
256 col_swapped = true;
257 }
258 else if (ilargest == 2) {
259 // c is largest
260
261 // swap rows
262 std::swap(a, c);
263 std::swap(b, d);
264 std::swap(x1, x2);
265 col_swapped = false;
266 }
267 else {
268 // d is largest
269
270 // swap columns
271 std::swap(a, b);
272 std::swap(c, d);
273 col_swapped = true;
274
275 // swap rows
276 std::swap(a, c);
277 std::swap(b, d);
278 std::swap(x1, x2);
279 }
280
281 // Step 2: LU factorization
282 // (a b) -> (1 0) (u00 u01)
283 // (c d) (l10 1) (0 u11)
284 // Note that we don't do overflow protection
285 // for c/a or d - l10 * u01 here
286 // I don't really see how that would work anyway
287
288 T l10 = c / a;
289 T u00 = a;
290 T u01 = b;
291 T u11 = d - l10 * u01;
292
293 // Step 3: Solve Ly = scale1 * rhs
294 T scale1 = trevc_protectupdate(abs(x2), abs(l10), abs(x1), sf_max);
295 T y1 = scale1 * x1;
296 T y2 = (scale1 * x2) - l10 * y1;
297
298 // Step 4: Solve Ux = y
300 y1 = scale2 * y1;
301 y2 = scale2 * y2;
302 x2 = y2 / u11;
303 T scale3 = trevc_protectupdate(abs(y1), abs(u01), abs(x2), sf_max);
304 x2 = scale3 * x2;
305 y1 = scale3 * y1;
306 y1 = y1 - u01 * x2;
308 x1 = (scale4 * y1) / u00;
309
310 // Step 5: undo column swap
311 if (col_swapped) std::swap(x1, x2);
312
314}
315
324template <TLAPACK_SCALAR T, enable_if_t<is_real<T>, int> = 0>
326 T ai,
327 T br,
328 T bi,
329 T cr,
330 T ci,
331 T dr,
332 T di,
333 T& x1r,
334 T& x1i,
335 T& x2r,
336 T& x2i,
337 T& scale,
338 T sf_min,
339 T sf_max)
340{
341 //
342 // Step 1: compute LU factorization with complete pivoting
343 //
344 T abs_a = abs(ar) + abs(ai);
345 T abs_b = abs(br) + abs(bi);
346 T abs_c = abs(cr) + abs(ci);
347 T abs_d = abs(dr) + abs(di);
348
349 // 0 if a is largest
350 // 1 if b is largest
351 // 2 if c is largest
352 // 3 if d is largest
353 int ilargest = 0;
354 T largest = abs_a;
355 if (abs_b > largest) {
356 ilargest = 1;
357 largest = abs_b;
358 }
359 if (abs_c > largest) {
360 ilargest = 2;
361 largest = abs_c;
362 }
363 if (abs_d > largest) {
364 ilargest = 3;
365 largest = abs_d;
366 }
367
368 // Swap rows and columns to have the largest element in a
369 bool col_swapped;
370 if (ilargest == 0) {
371 // a is largest, do nothing
372 col_swapped = false;
373 }
374 else if (ilargest == 1) {
375 // b is largest
376
377 // swap columns
378 std::swap(ar, br);
379 std::swap(ai, bi);
380 std::swap(cr, dr);
381 std::swap(ci, di);
382 col_swapped = true;
383 }
384 else if (ilargest == 2) {
385 // c is largest
386
387 // swap rows
388 std::swap(ar, cr);
389 std::swap(ai, ci);
390 std::swap(br, dr);
391 std::swap(bi, di);
392 std::swap(x1r, x2r);
393 std::swap(x1i, x2i);
394 col_swapped = false;
395 }
396 else {
397 // d is largest
398
399 // swap columns
400 std::swap(ar, br);
401 std::swap(ai, bi);
402 std::swap(cr, dr);
403 std::swap(ci, di);
404 col_swapped = true;
405
406 // swap rows
407 std::swap(ar, cr);
408 std::swap(ai, ci);
409 std::swap(br, dr);
410 std::swap(bi, di);
411 std::swap(x1r, x2r);
412 std::swap(x1i, x2i);
413 }
414
415 // Step 2: LU factorization
416 // (a b) -> (1 0) (u00 u01)
417 // (c d) (l10 1) (0 u11)
418 // Note that we don't do overflow protection
419 // for c/a or d - l10 * u01 here
420 // I don't really see how that would work anyway
421
422 // l10 = c / a
423 T l10r;
424 T l10i;
425 ladiv(cr, ci, ar, ai, l10r, l10i);
426 // u00 = a
427 T u00r = ar;
428 T u00i = ai;
429 // u01 = b
430 T u01r = br;
431 T u01i = bi;
432 // u11 = d - l10 * u01
433 T u11r = dr - (l10r * u01r - l10i * u01i);
434 T u11i = di - (l10r * u01i + l10i * u01r);
435
436 // Step 3: Solve Ly = scale1 * rhs
437 // y1 = scale1 * rhs1
438 // y2 = scale1 * rhs2 - l10 * (scale1 * rhs1)
439 T scale1 = trevc_protectupdate(abs(x2r) + abs(x2i), abs(l10r) + abs(l10i),
440 abs(x1r) + abs(x1i), sf_max);
441 T y1r = scale1 * x1r;
442 T y1i = scale1 * x1i; // We could actually assume that x1i is zero, but i
443 // left it in to be general
444 T y2r = (scale1 * x2r) - (l10r * y1r - l10i * y1i);
445 T y2i = (scale1 * x2i) - (l10r * y1i + l10i * y1r);
446
447 // Step 4: Solve Ux = y
448 // x2 = y2 / u11
450 y1r = scale2 * y1r;
451 y1i = scale2 * y1i;
452 y2r = scale2 * y2r;
453 y2i = scale2 * y2i;
454 ladiv(y2r, y2i, u11r, u11i, x2r, x2i);
455 // x1 = (y1 - u01 * x2) / u00
456 // temp = (y1 - u01 * x2)
457 T scale3 = trevc_protectupdate(abs(y1r) + abs(y1i), abs(u01r) + abs(u01i),
458 abs(x2r) + abs(x2i), sf_max);
459 x2r = scale3 * x2r;
460 x2i = scale3 * x2i;
461 y1r = scale3 * y1r;
462 y1i = scale3 * y1i;
463 T tempr = y1r - (u01r * x2r - u01i * x2i);
464 T tempi = y1i - (u01r * x2i + u01i * x2r);
465 // x1 = temp / u00
468
469 // Step 5: undo column swap
470 if (col_swapped) {
471 std::swap(x1r, x2r);
472 std::swap(x1i, x2i);
473 }
474
476}
477
505template <TLAPACK_MATRIX matrix_T_t, TLAPACK_VECTOR vector_colN_t>
507{
509
510 using idx_t = size_type<matrix_T_t>;
511 using TT = type_t<matrix_T_t>;
512 using real_t = real_type<TT>;
514
515 const idx_t n = nrows(T);
516
517 tlapack_check(ncols(T) == n);
518 tlapack_check(size(colN) == n);
519
520 if (norm == Norm::Inf) {
521 for (idx_t j = 0; j < n;) {
522 bool pair = false;
523 if (j + 1 < n) {
524 if (T(j + 1, j) != TT(0)) {
525 pair = true;
526 }
527 }
528
529 if (pair) {
530 // 2x2 block
531 real_t colnorm = real_t(0);
532 for (idx_t i = 0; i < j; ++i) {
533 colnorm = std::max<real_t>(
534 colnorm, abs1(T(i, j)) + abs1(T(i, j + 1)));
535 }
536 colN[j] = colnorm;
537 colN[j + 1] = real_t(0);
538 j += 2;
539 }
540 else {
541 // 1x1 block
542 real_t colnorm = real_t(0);
543 for (idx_t i = 0; i < j; ++i) {
544 colnorm = std::max<real_t>(colnorm, abs1(T(i, j)));
545 }
546 colN[j] = colnorm;
547 j += 1;
548 }
549 }
550 }
551 else if (norm == Norm::One) {
552 for (idx_t j = 0; j < n;) {
553 bool pair = false;
554 if (j + 1 < n) {
555 if (T(j + 1, j) != TT(0)) {
556 pair = true;
557 }
558 }
559
560 if (pair) {
561 // 2x2 block
564 for (idx_t i = 0; i < j; ++i) {
565 colnorm1 = colnorm1 + abs1(T(i, j));
566 colnorm2 = colnorm2 + abs1(T(i, j + 1));
567 }
568 colN[j] = colnorm1;
569 colN[j + 1] = colnorm2;
570 j += 2;
571 }
572 else {
573 // 1x1 block
574 real_t colnorm = real_t(0);
575 for (idx_t i = 0; i < j; ++i) {
576 colnorm = colnorm + abs1(T(i, j));
577 }
578 colN[j] = colnorm;
579 j += 1;
580 }
581 }
582 }
583}
584
585} // namespace tlapack
586
587#endif // TLAPACK_TREVC_PROTECT_HH
void ladiv(const real_t &a, const real_t &b, const real_t &c, const real_t &d, real_t &p, real_t &q)
Performs complex division in real arithmetic.
Definition ladiv.hpp:39
#define tlapack_check(cond)
Throw an error if cond is false.
Definition exceptionHandling.hpp:98
Sort the numbers in D in increasing order (if ID = 'I') or in decreasing order (if ID = 'D' ).
Definition arrayTraits.hpp:15
typename traits::real_type_traits< Types..., int >::type real_type
The common real type of the list of types.
Definition scalar_type_traits.hpp:113
constexpr real_type< T > real(const T &x) noexcept
Extends std::real() to real datatypes.
Definition utils.hpp:71
constexpr real_type< T > abs1(const T &x)
1-norm absolute value, |Re(x)| + |Im(x)|
Definition utils.hpp:133
T trevc_protectupdate(T ynorm, T tnorm, T xnorm, T sf_max)
Given the infinity norms of the matrices Y, T, and X, calculate a scaling factor scale in (0,...
Definition trevc_protect.hpp:138
constexpr real_type< T > imag(const T &x) noexcept
Extends std::imag() to real datatypes.
Definition utils.hpp:86
T trevc_protectdiv(T a, T b, T sf_min, T sf_max)
Given two numbers a and b, calculate a scaling factor alpha in (0, 1] such that the division (a * alp...
Definition trevc_protect.hpp:37
Norm
Definition types.hpp:308
@ Inf
infinity norm of matrices
@ One
one norm
void trevc_colnorms(Norm norm, const matrix_T_t &T, vector_colN_t &colN)
Calculate the 1- or infinity-norms of the columns of the strictly upper triangular part of T.
Definition trevc_protect.hpp:506
void trevc_2x2solve(T a, T b, T c, T d, T &x1, T &x2, T &scale, T sf_min, T sf_max)
Robustly solve a 2x2 system of equations (a b) (x1) = scale * (rhs1) (c d) (x2) (rhs2)
Definition trevc_protect.hpp:214
T trevc_protectsum(T a, T b, T sf_max)
Calculate a scaling factor xi in [0.5, 1] such that the sum (xi * a) + (xi * b) does not overflow.
Definition trevc_protect.hpp:166