tlapack/lahr2_8hpp_source.html

//

// Copyright (c) 2021-2023, University of Colorado Denver. All rights reserved.

//

// This file is part of <T>LAPACK.

// <T>LAPACK is free software: you can redistribute it and/or modify it under

// the terms of the BSD 3-Clause license. See the accompanying LICENSE file.


#ifndef TLAPACK_LAHR2_HH

#define TLAPACK_LAHR2_HH


#include "tlapack/base/utils.hpp"

#include "tlapack/blas/axpy.hpp"

#include "tlapack/blas/copy.hpp"

#include "tlapack/blas/gemm.hpp"

#include "tlapack/blas/gemv.hpp"

#include "tlapack/blas/scal.hpp"

#include "tlapack/blas/trmm.hpp"

#include "tlapack/blas/trmv.hpp"

#include "tlapack/lapack/lacpy.hpp"

#include "tlapack/lapack/larfg.hpp"


namespace tlapack {


template <TLAPACK_SMATRIX matrix_t,

          TLAPACK_VECTOR vector_t,

          TLAPACK_SMATRIX matrixT_t,

          TLAPACK_SMATRIX matrixY_t>


int lahr2(size_type<matrix_t> k,

          size_type<matrix_t> nb,

          matrix_t& A,

          vector_t& tau,

          matrixT_t& T,

          matrixY_t& Y)

{

    using TA = type_t<matrix_t>;

    using idx_t = size_type<matrix_t>;

    using range = pair<idx_t, idx_t>;

    using real_t = real_type<TA>;


    // constants

    const real_t one(1);

    const idx_t n = nrows(A);


    // quick return if possible

    if (n <= 1) return 0;


    TA ei(0);

    for (idx_t i = 0; i < nb; ++i) {

        if (i > 0) {

            //

            // Update A(K+1:N,I), this rest will be updated later via

            // level 3 BLAS.

            //


            //

            // Update I-th column of A - Y * V**T

            // (Application of the reflectors from the right)

            //

            auto Y2 = slice(Y, range{k + 1, n}, range{0, i});

            auto Vti = slice(A, k + i, range{0, i});

            auto b = slice(A, range{k + 1, n}, i);

            for (idx_t j = 0; j < i; ++j)

                Vti[j] = conj(Vti[j]);

            gemv(NO_TRANS, -one, Y2, Vti, one, b);

            for (idx_t j = 0; j < i; ++j)

                Vti[j] = conj(Vti[j]);

            //

            // Apply I - V * T**T * V**T to this column (call it b) from the

            // left, using the last column of T as workspace

            //

            // Let  V = ( V1 )   and   b = ( b1 )   (first i rows)

            //          ( V2 )             ( b2 )

            //

            // where V1 is unit lower triangular

            //

            auto b1 = slice(b, range{0, i});

            auto b2 = slice(b, range{i, size(b)});

            auto V = slice(A, range{k + 1, n}, range{0, i});

            auto V1 = slice(V, range{0, i}, range{0, i});

            auto V2 = slice(V, range{i, nrows(V)}, range{0, i});

            //

            // w := V1**T * b1

            //

            auto w = slice(T, range{0, i}, nb - 1);

            copy(b1, w);

            trmv(LOWER_TRIANGLE, CONJ_TRANS, UNIT_DIAG, V1, w);

            //

            // w := w + V2**T * b2

            //

            gemv(CONJ_TRANS, one, V2, b2, one, w);

            //

            // w := T**T * w

            //

            auto T2 = slice(T, range{0, i}, range{0, i});

            trmv(UPPER_TRIANGLE, CONJ_TRANS, NON_UNIT_DIAG, T2, w);

            //

            // b2 := b2 - V2*w

            //

            gemv(NO_TRANS, -one, V2, w, one, b2);

            //

            // b1 := b1 - V1*w

            //

            trmv(LOWER_TRIANGLE, NO_TRANS, UNIT_DIAG, V1, w);

            axpy(-one, w, b1);


            A(k + i, i - 1) = ei;

        }

        auto v = slice(A, range{k + i + 1, n}, i);

        larfg(FORWARD, COLUMNWISE_STORAGE, v, tau[i]);


        // larf has been edited to not require A(k+i,i) = one

        // this is for thread safety. Since we already modified

        // A(k+i,i) before, this is not required here

        ei = v[0];

        v[0] = one;

        //

        // Compute  Y(K+1:N,I)

        //

        auto A2 = slice(A, range{k + 1, n}, range{i + 1, n - k});

        auto y = slice(Y, range{k + 1, n}, i);

        gemv(NO_TRANS, one, A2, v, y);

        auto t = slice(T, range{0, i}, i);

        auto A3 = slice(A, range{k + i + 1, n}, range{0, i});

        gemv(CONJ_TRANS, one, A3, v, t);

        auto Y2 = slice(Y, range{k + 1, n}, range{0, i});

        gemv(NO_TRANS, -one, Y2, t, one, y);

        scal(tau[i], y);

        //

        // Compute T(0:I+1,I)

        //

        scal(-tau[i], t);

        auto T2 = slice(T, range{0, i}, range{0, i});

        trmv(UPPER_TRIANGLE, NO_TRANS, NON_UNIT_DIAG, T2, t);

        T(i, i) = tau[i];

    }

    A(k + nb, nb - 1) = ei;

    //

    // Compute Y(0:k+1,0:nb)

    //

    auto A4 = slice(A, range{0, k + 1}, range{1, nb + 1});

    auto Y3 = slice(Y, range{0, k + 1}, range{0, nb});

    lacpy(GENERAL, A4, Y3);

    auto V1 = slice(A, range{k + 1, k + nb + 1}, range{0, nb});

    auto Y1 = slice(Y, range{0, k + 1}, range{0, nb});

    trmm(RIGHT_SIDE, LOWER_TRIANGLE, NO_TRANS, UNIT_DIAG, one, V1, Y1);

    if (k + nb + 1 < n) {

        auto A5 = slice(A, range{0, k + 1}, range{nb + 1, n - k});

        auto V2 = slice(A, range{k + nb + 1, n}, range{0, nb});

        gemm(NO_TRANS, NO_TRANS, one, A5, V2, one, Y1);

    }

    trmm(RIGHT_SIDE, UPPER_TRIANGLE, NO_TRANS, NON_UNIT_DIAG, one, T, Y1);


    return 0;

}


}  // namespace tlapack


#endif  // TLAPACK_LAHR2_HH

tlapack::LOWER_TRIANGLE
constexpr internal::LowerTriangle LOWER_TRIANGLE
Lower Triangle access.
Definition types.hpp:183

tlapack::UPPER_TRIANGLE
constexpr internal::UpperTriangle UPPER_TRIANGLE
Upper Triangle access.
Definition types.hpp:181

tlapack::RIGHT_SIDE
constexpr internal::RightSide RIGHT_SIDE
right side
Definition types.hpp:291

tlapack::FORWARD
constexpr internal::Forward FORWARD
Forward direction.
Definition types.hpp:376

tlapack::UNIT_DIAG
constexpr internal::UnitDiagonal UNIT_DIAG
The main diagonal is assumed to consist of 1's.
Definition types.hpp:217

tlapack::GENERAL
constexpr internal::GeneralAccess GENERAL
General access.
Definition types.hpp:175

tlapack::NON_UNIT_DIAG
constexpr internal::NonUnitDiagonal NON_UNIT_DIAG
The main diagonal is not assumed to consist of 1's.
Definition types.hpp:215

tlapack::CONJ_TRANS
constexpr internal::ConjTranspose CONJ_TRANS
conjugate transpose
Definition types.hpp:259

tlapack::COLUMNWISE_STORAGE
constexpr internal::ColumnwiseStorage COLUMNWISE_STORAGE
Columnwise storage.
Definition types.hpp:409

tlapack::NO_TRANS
constexpr internal::NoTranspose NO_TRANS
no transpose
Definition types.hpp:255

utils.hpp

tlapack::conj
constexpr T conj(const T &x) noexcept
Extends std::conj() to real datatypes.
Definition utils.hpp:100

axpy.hpp

copy.hpp

gemm.hpp

gemv.hpp

scal.hpp

trmm.hpp

trmv.hpp

TLAPACK_SMATRIX
#define TLAPACK_SMATRIX
Macro for tlapack::concepts::SliceableMatrix compatible with C++17.
Definition concepts.hpp:899

TLAPACK_VECTOR
#define TLAPACK_VECTOR
Macro for tlapack::concepts::Vector compatible with C++17.
Definition concepts.hpp:906

tlapack::lahr2
int lahr2(size_type< matrix_t > k, size_type< matrix_t > nb, matrix_t &A, vector_t &tau, matrixT_t &T, matrixY_t &Y)
Reduces a general square matrix to upper Hessenberg form.
Definition lahr2.hpp:61

tlapack::larfg
void larfg(storage_t storeMode, type_t< vector_t > &alpha, vector_t &x, type_t< vector_t > &tau)
Generates a elementary Householder reflection.
Definition larfg.hpp:73

tlapack::lacpy
void lacpy(uplo_t uplo, const matrixA_t &A, matrixB_t &B)
Copies a matrix from A to B.
Definition lacpy.hpp:38

tlapack::copy
void copy(const vectorX_t &x, vectorY_t &y)
Copy vector, .
Definition copy.hpp:31

tlapack::axpy
void axpy(const alpha_t &alpha, const vectorX_t &x, vectorY_t &y)
Add scaled vector, .
Definition axpy.hpp:34

tlapack::scal
void scal(const alpha_t &alpha, vector_t &x)
Scale vector by constant, .
Definition scal.hpp:30

tlapack::gemv
void gemv(Op trans, const alpha_t &alpha, const matrixA_t &A, const vectorX_t &x, const beta_t &beta, vectorY_t &y)
General matrix-vector multiply:
Definition gemv.hpp:57

tlapack::trmv
void trmv(Uplo uplo, Op trans, Diag diag, const matrixA_t &A, vectorX_t &x)
Triangular matrix-vector multiply:
Definition trmv.hpp:60

tlapack::trmm
void trmm(Side side, Uplo uplo, Op trans, Diag diag, const alpha_t &alpha, const matrixA_t &A, matrixB_t &B)
Triangular matrix-matrix multiply:
Definition trmm.hpp:72

tlapack::gemm
void gemm(Op transA, Op transB, const alpha_t &alpha, const matrixA_t &A, const matrixB_t &B, const beta_t &beta, matrixC_t &C)
General matrix-matrix multiply:
Definition gemm.hpp:61

lacpy.hpp

larfg.hpp

tlapack::real_type
typename traits::real_type_traits< Types..., int >::type real_type
The common real type of the list of types.
Definition scalar_type_traits.hpp:113