tlapack/schur__swap_8hpp_source.html

//

// Copyright (c) 2025, University of Colorado Denver. All rights reserved.

//

// This file is part of <T>LAPACK.

// <T>LAPACK is free software: you can redistribute it and/or modify it under

// the terms of the BSD 3-Clause license. See the accompanying LICENSE file.


#ifndef TLAPACK_SCHUR_SWAP_HH

#define TLAPACK_SCHUR_SWAP_HH


#include "tlapack/base/utils.hpp"

#include "tlapack/blas/rot.hpp"

#include "tlapack/blas/rotg.hpp"

#include "tlapack/blas/swap.hpp"

#include "tlapack/lapack/lahqr_schur22.hpp"

#include "tlapack/lapack/lange.hpp"

#include "tlapack/lapack/larfg.hpp"

#include "tlapack/lapack/lasy2.hpp"


namespace tlapack {


template <TLAPACK_CSMATRIX matrix_t,

          enable_if_t<is_real<type_t<matrix_t>>, bool> = true>


int schur_swap(bool want_q,

               matrix_t& A,

               matrix_t& Q,

               const size_type<matrix_t>& j0,

               const size_type<matrix_t>& n1,

               const size_type<matrix_t>& n2)

{

    using idx_t = size_type<matrix_t>;

    using T = type_t<matrix_t>;

    using range = pair<idx_t, idx_t>;


    // Functor for creating new matrices

    CreateStatic<matrix_t, 3, 2> new_3by2_matrix;

    CreateStatic<matrix_t, 4, 4> new_4by4_matrix;

    CreateStatic<matrix_t, 4, 2> new_4by2_matrix;


    const idx_t n = ncols(A);

    const T zero(0);

    const T ten(10);


    tlapack_check(nrows(A) == n);

    tlapack_check(nrows(Q) == n);

    tlapack_check(ncols(Q) == n);

    tlapack_check(0 <= j0);

    tlapack_check(j0 + n1 + n2 <= n);

    tlapack_check(n1 == 1 or n1 == 2);

    tlapack_check(n2 == 1 or n2 == 2);


    const idx_t j1 = j0 + 1;

    const idx_t j2 = j0 + 2;

    const idx_t j3 = j0 + 3;


    // Check if the 2x2 eigenvalue blocks consist of 2 1x1 blocks

    // If so, treat them separately

    if (n1 == 2)

        if (A(j1, j0) == zero) {

            // only 2x2 swaps can fail, so we don't need to check for error

            schur_swap(want_q, A, Q, j1, (idx_t)1, n2);

            schur_swap(want_q, A, Q, j0, (idx_t)1, n2);

            return 0;

        }

    if (n2 == 2)

        if (A(j0 + n1 + 1, j0 + n1) == zero) {

            // only 2x2 swaps can fail, so we don't need to check for error

            schur_swap(want_q, A, Q, j0, n1, (idx_t)1);

            schur_swap(want_q, A, Q, j1, n1, (idx_t)1);

            return 0;

        }


    if (n1 == 1 and n2 == 1) {

        //

        // Swap two 1-by-1 blocks.

        //

        const T t00 = A(j0, j0);

        const T t11 = A(j1, j1);

        //

        // Determine the transformation to perform the interchange

        //

        T cs, sn;

        T temp = A(j0, j1);

        T temp2 = t11 - t00;

        rotg(temp, temp2, cs, sn);


        A(j1, j1) = t00;

        A(j0, j0) = t11;


        // Apply transformation from the left

        if (j2 < n) {

            auto row1 = slice(A, j0, range{j2, n});

            auto row2 = slice(A, j1, range{j2, n});

            rot(row1, row2, cs, sn);

        }

        // Apply transformation from the right

        if (j0 > 0) {

            auto col1 = slice(A, range{0, j0}, j0);

            auto col2 = slice(A, range{0, j0}, j1);

            rot(col1, col2, cs, sn);

        }

        if (want_q) {

            auto row1 = col(Q, j0);

            auto row2 = col(Q, j1);

            rot(row1, row2, cs, sn);

        }

    }

    if (n1 == 1 and n2 == 2) {

        //

        // Swap 1-by-1 block with 2-by-2 block

        //


        T B_[3 * 2];

        auto B = new_3by2_matrix(B_);

        B(0, 0) = A(j0, j1);

        B(1, 0) = A(j1, j1) - A(j0, j0);

        B(2, 0) = A(j2, j1);

        B(0, 1) = A(j0, j2);

        B(1, 1) = A(j1, j2);

        B(2, 1) = A(j2, j2) - A(j0, j0);


        // Make B upper triangular

        T tau1, tau2;

        auto v1 = slice(B, range{0, 3}, 0);

        auto v2 = slice(B, range{1, 3}, 1);

        larfg(FORWARD, COLUMNWISE_STORAGE, v1, tau1);

        const T sum = B(0, 1) + v1[1] * B(1, 1) + v1[2] * B(2, 1);

        B(0, 1) = B(0, 1) - sum * tau1;

        B(1, 1) = B(1, 1) - sum * tau1 * v1[1];

        B(2, 1) = B(2, 1) - sum * tau1 * v1[2];

        larfg(FORWARD, COLUMNWISE_STORAGE, v2, tau2);


        //

        // Apply reflections to A and Q

        //


        // Reflections from the left

        for (idx_t j = j0; j < n; ++j) {

            T sum = A(j0, j) + v1[1] * A(j1, j) + v1[2] * A(j2, j);

            A(j0, j) = A(j0, j) - sum * tau1;

            A(j1, j) = A(j1, j) - sum * tau1 * v1[1];

            A(j2, j) = A(j2, j) - sum * tau1 * v1[2];


            sum = A(j1, j) + v2[1] * A(j2, j);

            A(j1, j) = A(j1, j) - sum * tau2;

            A(j2, j) = A(j2, j) - sum * tau2 * v2[1];

        }

        // Reflections from the right

        for (idx_t j = 0; j < j3; ++j) {

            T sum = A(j, j0) + v1[1] * A(j, j1) + v1[2] * A(j, j2);

            A(j, j0) = A(j, j0) - sum * tau1;

            A(j, j1) = A(j, j1) - sum * tau1 * v1[1];

            A(j, j2) = A(j, j2) - sum * tau1 * v1[2];


            sum = A(j, j1) + v2[1] * A(j, j2);

            A(j, j1) = A(j, j1) - sum * tau2;

            A(j, j2) = A(j, j2) - sum * tau2 * v2[1];

        }


        if (want_q) {

            for (idx_t j = 0; j < n; ++j) {

                T sum = Q(j, j0) + v1[1] * Q(j, j1) + v1[2] * Q(j, j2);

                Q(j, j0) = Q(j, j0) - sum * tau1;

                Q(j, j1) = Q(j, j1) - sum * tau1 * v1[1];

                Q(j, j2) = Q(j, j2) - sum * tau1 * v1[2];


                sum = Q(j, j1) + v2[1] * Q(j, j2);

                Q(j, j1) = Q(j, j1) - sum * tau2;

                Q(j, j2) = Q(j, j2) - sum * tau2 * v2[1];

            }

        }


        A(j2, j0) = zero;

        A(j2, j1) = zero;

    }

    if (n1 == 2 and n2 == 1) {

        //

        // Swap 2-by-2 block with 1-by-1 block

        //


        T B_[3 * 2];

        auto B = new_3by2_matrix(B_);

        B(0, 0) = A(j1, j2);

        B(1, 0) = A(j1, j1) - A(j2, j2);

        B(2, 0) = A(j1, j0);

        B(0, 1) = A(j0, j2);

        B(1, 1) = A(j0, j1);

        B(2, 1) = A(j0, j0) - A(j2, j2);


        // Make B upper triangular

        T tau1, tau2;

        auto v1 = slice(B, range{0, 3}, 0);

        auto v2 = slice(B, range{1, 3}, 1);

        larfg(FORWARD, COLUMNWISE_STORAGE, v1, tau1);

        const T sum = B(0, 1) + v1[1] * B(1, 1) + v1[2] * B(2, 1);

        B(0, 1) = B(0, 1) - sum * tau1;

        B(1, 1) = B(1, 1) - sum * tau1 * v1[1];

        B(2, 1) = B(2, 1) - sum * tau1 * v1[2];

        larfg(FORWARD, COLUMNWISE_STORAGE, v2, tau2);


        //

        // Apply reflections to A and Q

        //


        // Reflections from the left

        for (idx_t j = j0; j < n; ++j) {

            T sum = A(j2, j) + v1[1] * A(j1, j) + v1[2] * A(j0, j);

            A(j2, j) = A(j2, j) - sum * tau1;

            A(j1, j) = A(j1, j) - sum * tau1 * v1[1];

            A(j0, j) = A(j0, j) - sum * tau1 * v1[2];


            sum = A(j1, j) + v2[1] * A(j0, j);

            A(j1, j) = A(j1, j) - sum * tau2;

            A(j0, j) = A(j0, j) - sum * tau2 * v2[1];

        }

        // Reflections from the right

        for (idx_t j = 0; j < j3; ++j) {

            T sum = A(j, j2) + v1[1] * A(j, j1) + v1[2] * A(j, j0);

            A(j, j2) = A(j, j2) - sum * tau1;

            A(j, j1) = A(j, j1) - sum * tau1 * v1[1];

            A(j, j0) = A(j, j0) - sum * tau1 * v1[2];


            sum = A(j, j1) + v2[1] * A(j, j0);

            A(j, j1) = A(j, j1) - sum * tau2;

            A(j, j0) = A(j, j0) - sum * tau2 * v2[1];

        }


        if (want_q) {

            for (idx_t j = 0; j < n; ++j) {

                T sum = Q(j, j2) + v1[1] * Q(j, j1) + v1[2] * Q(j, j0);

                Q(j, j2) = Q(j, j2) - sum * tau1;

                Q(j, j1) = Q(j, j1) - sum * tau1 * v1[1];

                Q(j, j0) = Q(j, j0) - sum * tau1 * v1[2];


                sum = Q(j, j1) + v2[1] * Q(j, j0);

                Q(j, j1) = Q(j, j1) - sum * tau2;

                Q(j, j0) = Q(j, j0) - sum * tau2 * v2[1];

            }

        }


        A(j1, j0) = zero;

        A(j2, j0) = zero;

    }

    if (n1 == 2 and n2 == 2) {

        T D_[4 * 4];

        auto D = new_4by4_matrix(D_);


        auto AD_slice = slice(A, range{j0, j0 + 4}, range{j0, j0 + 4});

        lacpy(GENERAL, AD_slice, D);

        auto dnorm = lange(MAX_NORM, D);


        const T eps = ulp<T>();

        const T small_num = safe_min<T>() / eps;

        T thresh = max(ten * eps * dnorm, small_num);

        // Note: max() may not propagate NaNs.


        T V_[4 * 2];

        auto V = new_4by2_matrix(V_);

        auto X = slice(V, range{0, 2}, range{0, 2});

        auto TL = slice(D, range{0, 2}, range{0, 2});

        auto TR = slice(D, range{2, 4}, range{2, 4});

        auto B = slice(D, range{0, 2}, range{2, 4});

        T scale, xnorm;

        lasy2(NO_TRANS, NO_TRANS, -1, TL, TR, B, scale, X, xnorm);


        V(2, 0) = -scale;

        V(2, 1) = zero;

        V(3, 0) = zero;

        V(3, 1) = -scale;


        // Make V upper triangular

        T tau1, tau2;

        auto v1 = slice(V, range{0, 4}, 0);

        auto v2 = slice(V, range{1, 4}, 1);

        larfg(FORWARD, COLUMNWISE_STORAGE, v1, tau1);

        const T sum =

            V(0, 1) + v1[1] * V(1, 1) + v1[2] * V(2, 1) + v1[3] * V(3, 1);

        V(0, 1) = V(0, 1) - sum * tau1;

        V(1, 1) = V(1, 1) - sum * tau1 * v1[1];

        V(2, 1) = V(2, 1) - sum * tau1 * v1[2];

        V(3, 1) = V(3, 1) - sum * tau1 * v1[3];

        larfg(FORWARD, COLUMNWISE_STORAGE, v2, tau2);


        // Apply reflections to D to check error

        for (idx_t j = 0; j < 4; ++j) {

            T sum =

                D(0, j) + v1[1] * D(1, j) + v1[2] * D(2, j) + v1[3] * D(3, j);

            D(0, j) = D(0, j) - sum * tau1;

            D(1, j) = D(1, j) - sum * tau1 * v1[1];

            D(2, j) = D(2, j) - sum * tau1 * v1[2];

            D(3, j) = D(3, j) - sum * tau1 * v1[3];


            sum = D(1, j) + v2[1] * D(2, j) + v2[2] * D(3, j);

            D(1, j) = D(1, j) - sum * tau2;

            D(2, j) = D(2, j) - sum * tau2 * v2[1];

            D(3, j) = D(3, j) - sum * tau2 * v2[2];

        }

        for (idx_t j = 0; j < 4; ++j) {

            T sum =

                D(j, 0) + v1[1] * D(j, 1) + v1[2] * D(j, 2) + v1[3] * D(j, 3);

            D(j, 0) = D(j, 0) - sum * tau1;

            D(j, 1) = D(j, 1) - sum * tau1 * v1[1];

            D(j, 2) = D(j, 2) - sum * tau1 * v1[2];

            D(j, 3) = D(j, 3) - sum * tau1 * v1[3];


            sum = D(j, 1) + v2[1] * D(j, 2) + v2[2] * D(j, 3);

            D(j, 1) = D(j, 1) - sum * tau2;

            D(j, 2) = D(j, 2) - sum * tau2 * v2[1];

            D(j, 3) = D(j, 3) - sum * tau2 * v2[2];

        }


        if (max(max(abs(D(2, 0)), abs(D(2, 1))),

                max(abs(D(3, 0)), abs(D(3, 1)))) > thresh)

            return 1;


        // Reflections from the left

        for (idx_t j = j0; j < n; ++j) {

            T sum = A(j0, j) + v1[1] * A(j1, j) + v1[2] * A(j2, j) +

                    v1[3] * A(j3, j);

            A(j0, j) = A(j0, j) - sum * tau1;

            A(j1, j) = A(j1, j) - sum * tau1 * v1[1];

            A(j2, j) = A(j2, j) - sum * tau1 * v1[2];

            A(j3, j) = A(j3, j) - sum * tau1 * v1[3];


            sum = A(j1, j) + v2[1] * A(j2, j) + v2[2] * A(j3, j);

            A(j1, j) = A(j1, j) - sum * tau2;

            A(j2, j) = A(j2, j) - sum * tau2 * v2[1];

            A(j3, j) = A(j3, j) - sum * tau2 * v2[2];

        }

        // Reflections from the right

        for (idx_t j = 0; j < j0 + 4; ++j) {

            T sum = A(j, j0) + v1[1] * A(j, j1) + v1[2] * A(j, j2) +

                    v1[3] * A(j, j3);

            A(j, j0) = A(j, j0) - sum * tau1;

            A(j, j1) = A(j, j1) - sum * tau1 * v1[1];

            A(j, j2) = A(j, j2) - sum * tau1 * v1[2];

            A(j, j3) = A(j, j3) - sum * tau1 * v1[3];


            sum = A(j, j1) + v2[1] * A(j, j2) + v2[2] * A(j, j3);

            A(j, j1) = A(j, j1) - sum * tau2;

            A(j, j2) = A(j, j2) - sum * tau2 * v2[1];

            A(j, j3) = A(j, j3) - sum * tau2 * v2[2];

        }


        if (want_q) {

            for (idx_t j = 0; j < n; ++j) {

                T sum = Q(j, j0) + v1[1] * Q(j, j1) + v1[2] * Q(j, j2) +

                        v1[3] * Q(j, j3);

                Q(j, j0) = Q(j, j0) - sum * tau1;

                Q(j, j1) = Q(j, j1) - sum * tau1 * v1[1];

                Q(j, j2) = Q(j, j2) - sum * tau1 * v1[2];

                Q(j, j3) = Q(j, j3) - sum * tau1 * v1[3];


                sum = Q(j, j1) + v2[1] * Q(j, j2) + v2[2] * Q(j, j3);

                Q(j, j1) = Q(j, j1) - sum * tau2;

                Q(j, j2) = Q(j, j2) - sum * tau2 * v2[1];

                Q(j, j3) = Q(j, j3) - sum * tau2 * v2[2];

            }

        }


        A(j2, j0) = zero;

        A(j2, j1) = zero;

        A(j3, j0) = zero;

        A(j3, j1) = zero;

    }


    // Standardize the 2x2 Schur blocks (if any)

    if (n2 == 2) {

        T cs, sn;

        complex_type<T> s1, s2;

        lahqr_schur22(A(j0, j0), A(j0, j1), A(j1, j0), A(j1, j1), s1, s2, cs,

                      sn);  // Apply transformation from the left

        if (j2 < n) {

            auto row1 = slice(A, j0, range{j2, n});

            auto row2 = slice(A, j1, range{j2, n});

            rot(row1, row2, cs, sn);

        }

        // Apply transformation from the right

        if (j0 > 0) {

            auto col1 = slice(A, range{0, j0}, j0);

            auto col2 = slice(A, range{0, j0}, j1);

            rot(col1, col2, cs, sn);

        }

        if (want_q) {

            auto row1 = col(Q, j0);

            auto row2 = col(Q, j1);

            rot(row1, row2, cs, sn);

        }

    }

    if (n1 == 2) {

        idx_t j0_2 = j0 + n2;

        idx_t j1_2 = j0_2 + 1;


        T cs, sn;

        complex_type<T> s1, s2;

        lahqr_schur22(A(j0_2, j0_2), A(j0_2, j1_2), A(j1_2, j0_2),

                      A(j1_2, j1_2), s1, s2, cs,

                      sn);  // Apply transformation from the left

        if (j0_2 + 2 < n) {

            auto row1 = slice(A, j0_2, range{j0_2 + 2, n});

            auto row2 = slice(A, j1_2, range{j0_2 + 2, n});

            rot(row1, row2, cs, sn);

        }

        // Apply transformation from the right

        if (j0_2 > 0) {

            auto col1 = slice(A, range{0, j0_2}, j0_2);

            auto col2 = slice(A, range{0, j0_2}, j1_2);

            rot(col1, col2, cs, sn);

        }

        if (want_q) {

            auto row1 = col(Q, j0_2);

            auto row2 = col(Q, j1_2);

            rot(row1, row2, cs, sn);

        }

    }


    return 0;

}


template <TLAPACK_CSMATRIX matrix_t,

          enable_if_t<is_complex<type_t<matrix_t>>, bool> = true>

int schur_swap(bool want_q,

               matrix_t& A,

               matrix_t& Q,

               const size_type<matrix_t>& j0,

               const size_type<matrix_t>& n1,

               const size_type<matrix_t>& n2)

{

    using idx_t = size_type<matrix_t>;

    using T = type_t<matrix_t>;

    using real_t = real_type<T>;

    using range = pair<idx_t, idx_t>;


    const idx_t n = ncols(A);


    tlapack_check(nrows(A) == n);

    tlapack_check(nrows(Q) == n);

    tlapack_check(ncols(Q) == n);

    tlapack_check(0 <= j0 and j0 < n);

    tlapack_check(n1 == 1);

    tlapack_check(n2 == 1);


    const idx_t j1 = j0 + 1;

    const idx_t j2 = j0 + 2;


    //

    // In the complex case, there can only be 1x1 blocks to swap

    //

    const T t00 = A(j0, j0);

    const T t11 = A(j1, j1);

    //

    // Determine the transformation to perform the interchange

    //

    real_t cs;

    T sn;

    T temp = A(j0, j1);

    T temp2 = t11 - t00;

    rotg(temp, temp2, cs, sn);


    A(j1, j1) = t00;

    A(j0, j0) = t11;


    // Apply transformation from the left

    if (j2 < n) {

        auto row1 = slice(A, j0, range{j2, n});

        auto row2 = slice(A, j1, range{j2, n});

        rot(row1, row2, cs, sn);

    }

    // Apply transformation from the right

    if (j0 > 0) {

        auto col1 = slice(A, range{0, j0}, j0);

        auto col2 = slice(A, range{0, j0}, j1);

        rot(col1, col2, cs, conj(sn));

    }

    if (want_q) {

        auto row1 = col(Q, j0);

        auto row2 = col(Q, j1);

        rot(row1, row2, cs, conj(sn));

    }


    return 0;

}


}  // namespace tlapack


#endif  // TLAPACK_SCHUR_SWAP_HH

utils.hpp

rot.hpp

rotg.hpp

swap.hpp

TLAPACK_CSMATRIX
#define TLAPACK_CSMATRIX
Macro for tlapack::concepts::ConstructableAndSliceableMatrix compatible with C++17.
Definition concepts.hpp:961

tlapack::lange
auto lange(norm_t normType, const matrix_t &A)
Calculates the norm of a matrix.
Definition lange.hpp:38

tlapack::schur_swap
int schur_swap(bool want_q, matrix_t &A, matrix_t &Q, const size_type< matrix_t > &j0, const size_type< matrix_t > &n1, const size_type< matrix_t > &n2)
schur_swap, swaps 2 eigenvalues of A.
Definition schur_swap.hpp:49

tlapack::larfg
void larfg(storage_t storeMode, type_t< vector_t > &alpha, vector_t &x, type_t< vector_t > &tau)
Generates a elementary Householder reflection.
Definition larfg.hpp:73

tlapack::lahqr_schur22
void lahqr_schur22(T &a, T &b, T &c, T &d, complex_type< T > &s1, complex_type< T > &s2, T &cs, T &sn)
Computes the Schur factorization of a 2x2 matrix A.
Definition lahqr_schur22.hpp:44

tlapack::lacpy
void lacpy(uplo_t uplo, const matrixA_t &A, matrixB_t &B)
Copies a matrix from A to B.
Definition lacpy.hpp:38

tlapack::lasy2
int lasy2(Op trans_l, Op trans_r, int isign, const matrixT_t &TL, const matrixT_t &TR, const matrixB_t &B, type_t< matrixX_t > &scale, matrixX_t &X, type_t< matrixX_t > &xnorm)
lasy2 solves the Sylvester matrix equation where the matrices are of order 1 or 2.
Definition lasy2.hpp:42

tlapack::rotg
void rotg(T &a, T &b, T &c, T &s)
Construct plane rotation that eliminates b, such that:
Definition rotg.hpp:39

tlapack::rot
void rot(vectorX_t &x, vectorY_t &y, const c_type &c, const s_type &s)
Apply plane rotation:
Definition rot.hpp:44

tlapack_check
#define tlapack_check(cond)
Throw an error if cond is false.
Definition exceptionHandling.hpp:98

lahqr_schur22.hpp

lange.hpp

larfg.hpp

lasy2.hpp

tlapack
Sort the numbers in D in increasing order (if ID = 'I') or in decreasing order (if ID = 'D' ).
Definition arrayTraits.hpp:15

tlapack::real_type
typename traits::real_type_traits< Types..., int >::type real_type
The common real type of the list of types.
Definition scalar_type_traits.hpp:113

tlapack::MAX_NORM
constexpr internal::MaxNorm MAX_NORM
max norm
Definition types.hpp:339

tlapack::conj
constexpr T conj(const T &x) noexcept
Extends std::conj() to real datatypes.
Definition utils.hpp:100

tlapack::FORWARD
constexpr internal::Forward FORWARD
Forward direction.
Definition types.hpp:381

tlapack::GENERAL
constexpr internal::GeneralAccess GENERAL
General access.
Definition types.hpp:180

tlapack::COLUMNWISE_STORAGE
constexpr internal::ColumnwiseStorage COLUMNWISE_STORAGE
Columnwise storage.
Definition types.hpp:414

tlapack::NO_TRANS
constexpr internal::NoTranspose NO_TRANS
no transpose
Definition types.hpp:260