tlapack/gehrd_8hpp_source.html

//

// Copyright (c) 2025, University of Colorado Denver. All rights reserved.

//

// This file is part of <T>LAPACK.

// <T>LAPACK is free software: you can redistribute it and/or modify it under

// the terms of the BSD 3-Clause license. See the accompanying LICENSE file.


#ifndef TLAPACK_GEHRD_HH

#define TLAPACK_GEHRD_HH


#include "tlapack/base/utils.hpp"

#include "tlapack/blas/gemm.hpp"

#include "tlapack/lapack/gehd2.hpp"

#include "tlapack/lapack/lahr2.hpp"

#include "tlapack/lapack/larfb.hpp"

#include "tlapack/lapack/laset.hpp"


namespace tlapack {


struct GehrdOpts {

    size_t nb = 32;

    size_t nx_switch = 128;

};


template <class T, TLAPACK_SMATRIX matrix_t, TLAPACK_SVECTOR vector_t>


constexpr WorkInfo gehrd_worksize(size_type<matrix_t> ilo,

                                  size_type<matrix_t> ihi,

                                  const matrix_t& A,

                                  const vector_t& tau,

                                  const GehrdOpts& opts = {})

{

    using idx_t = size_type<matrix_t>;

    using work_t = matrix_type<matrix_t, vector_t>;

    using range = pair<idx_t, idx_t>;


    const idx_t n = ncols(A);

    const idx_t nb = (ilo < ihi) ? min<idx_t>(opts.nb, ihi - ilo - 1) : 0;

    const idx_t nx = max<idx_t>(nb, opts.nx_switch);


    WorkInfo workinfo;

    if constexpr (is_same_v<T, type_t<work_t>>) {

        if (n > 0) {

            if ((ilo < ihi) && (nx < ihi - ilo - 1)) {

                workinfo = WorkInfo(n + nb, nb);


                auto&& V = slice(A, range{ilo + 1, ihi}, range{ilo, ilo + nb});

                auto&& T_s = slice(A, range{0, nb}, range{0, nb});

                auto&& A5 = slice(A, range{ilo + 1, ihi}, range{ilo + nb, n});


                workinfo.minMax(larfb_worksize<T>(LEFT_SIDE, CONJ_TRANS,

                                                  FORWARD, COLUMNWISE_STORAGE,

                                                  V, T_s, A5)

                                    .transpose());

            }

            workinfo.minMax(gehd2_worksize<T>(ilo, ihi, A, tau));

        }

    }


    return workinfo;

}


template <TLAPACK_SMATRIX matrix_t,

          TLAPACK_SVECTOR vector_t,

          TLAPACK_WORKSPACE work_t>


int gehrd_work(size_type<matrix_t> ilo,

               size_type<matrix_t> ihi,

               matrix_t& A,

               vector_t& tau,

               work_t& work,

               const GehrdOpts& opts = {})

{

    using idx_t = size_type<matrix_t>;

    using T = type_t<work_t>;

    using range = pair<idx_t, idx_t>;

    using TA = type_t<matrix_t>;

    using real_t = real_type<TA>;


    // constants

    const real_t one(1);

    const T zero(0);

    const idx_t n = ncols(A);


    // Blocksize

    const idx_t nb = (ilo < ihi) ? min<idx_t>(opts.nb, ihi - ilo - 1) : 0;

    // Size of the last block which be handled with unblocked code

    const idx_t nx = max(nb, (idx_t)opts.nx_switch);


    // check arguments

    tlapack_check_false((ilo < 0) or (ilo >= n));

    tlapack_check_false((ihi < 0) or (ihi > n));

    tlapack_check_false(ncols(A) != nrows(A));

    tlapack_check_false((idx_t)size(tau) < n - 1);


    // quick return

    if (n <= 0) return 0;


    // Matrices Y and T

    auto [Y, work2] = ((ilo < ihi) && (nx < ihi - ilo - 1))

                          ? reshape(work, n, nb)

                          : reshape(work, 0, 0);

    auto Yt = transpose_view(Y);

    auto [matrixT, work3] = ((ilo < ihi) && (nx < ihi - ilo - 1))

                                ? reshape(work2, nb, nb)

                                : reshape(work2, 0, 0);

    laset(GENERAL, zero, zero, Y);


    idx_t i = ilo;

    for (; i + nx < ihi - 1; i = i + nb) {

        const idx_t nb2 = min(nb, ihi - i - 1);


        auto V = slice(A, range{i + 1, ihi}, range{i, i + nb2});

        auto A2 = slice(A, range{0, ihi}, range{i, ihi});

        auto tau2 = slice(tau, range{i, ihi});

        auto T_s = slice(matrixT, range{0, nb2}, range{0, nb2});

        auto Y_s = slice(Y, range{0, n}, range{0, nb2});

        lahr2(i, nb2, A2, tau2, T_s, Y_s);

        if (i + nb2 < ihi) {

            // Note, this V2 contains the last row of the triangular part

            auto V2 = slice(V, range{nb2 - 1, ihi - i - 1}, range{0, nb2});


            // Apply the block reflector H to A(0:ihi,i+nb:ihi) from the right,

            // computing A := A - Y * V**T. The multiplication requires

            // V(nb2-1,nb2-1) to be set to 1.

            const TA ei = V(nb2 - 1, nb2 - 1);

            V(nb2 - 1, nb2 - 1) = one;

            auto A3 = slice(A, range{0, ihi}, range{i + nb2, ihi});

            auto Y_2 = slice(Y, range{0, ihi}, range{0, nb2});

            gemm(NO_TRANS, CONJ_TRANS, -one, Y_2, V2, one, A3);

            V(nb2 - 1, nb2 - 1) = ei;

        }

        // Apply the block reflector H to A(0:i+1,i+1:i+ib) from the right

        auto V1 = slice(A, range{i + 1, i + nb2 + 1}, range{i, i + nb2});

        trmm(RIGHT_SIDE, LOWER_TRIANGLE, CONJ_TRANS, UNIT_DIAG, one, V1, Y_s);

        for (idx_t j = 0; j < nb2 - 1; ++j) {

            auto A4 = slice(A, range{0, i + 1}, i + j + 1);

            axpy(-one, slice(Y, range{0, i + 1}, j), A4);

        }


        // Apply the block reflector H to A(i+1:ihi,i+nb:n) from the left

        auto A5 = slice(A, range{i + 1, ihi}, range{i + nb2, n});

        larfb_work(LEFT_SIDE, CONJ_TRANS, FORWARD, COLUMNWISE_STORAGE, V, T_s,

                   A5, Yt);

    }


    return gehd2_work(i, ihi, A, tau, work);

}


template <TLAPACK_SMATRIX matrix_t, TLAPACK_SVECTOR vector_t>


int gehrd(size_type<matrix_t> ilo,

          size_type<matrix_t> ihi,

          matrix_t& A,

          vector_t& tau,

          const GehrdOpts& opts = {})

{

    using idx_t = size_type<matrix_t>;

    using work_t = matrix_type<matrix_t, vector_t>;

    using T = type_t<work_t>;


    // Functor

    Create<work_t> new_matrix;


    // constants

    const idx_t n = ncols(A);


    // quick return

    if (n <= 0) return 0;


    // Allocates workspace

    WorkInfo workinfo = gehrd_worksize<T>(ilo, ihi, A, tau, opts);

    std::vector<T> work_;

    auto work = new_matrix(work_, workinfo.m, workinfo.n);


    return gehrd_work(ilo, ihi, A, tau, work, opts);

}


}  // namespace tlapack


#endif  // TLAPACK_GEHRD_HH

utils.hpp

gemm.hpp

TLAPACK_SVECTOR
#define TLAPACK_SVECTOR
Macro for tlapack::concepts::SliceableVector compatible with C++17.
Definition concepts.hpp:909

TLAPACK_SMATRIX
#define TLAPACK_SMATRIX
Macro for tlapack::concepts::SliceableMatrix compatible with C++17.
Definition concepts.hpp:899

TLAPACK_WORKSPACE
#define TLAPACK_WORKSPACE
Macro for tlapack::concepts::Workspace compatible with C++17.
Definition concepts.hpp:912

gehd2.hpp

tlapack::gehrd
int gehrd(size_type< matrix_t > ilo, size_type< matrix_t > ihi, matrix_t &A, vector_t &tau, const GehrdOpts &opts={})
Reduces a general square matrix to upper Hessenberg form.
Definition gehrd.hpp:222

tlapack::transpose
void transpose(matrixA_t &A, matrixB_t &B, const TransposeOpts &opts={})
transpose a matrix A into a matrix B.
Definition transpose.hpp:92

tlapack::laset
void laset(uplo_t uplo, const type_t< matrix_t > &alpha, const type_t< matrix_t > &beta, matrix_t &A)
Initializes a matrix to diagonal and off-diagonal values.
Definition laset.hpp:38

tlapack::larfb_work
int larfb_work(side_t side, trans_t trans, direction_t direction, storage_t storeMode, const matrixV_t &V, const matrixT_t &Tmatrix, matrixC_t &C, work_t &work)
Applies a block reflector  or its conjugate transpose  to a m-by-n matrix C, from either the left or ...
Definition larfb.hpp:111

tlapack::lahr2
int lahr2(size_type< matrix_t > k, size_type< matrix_t > nb, matrix_t &A, vector_t &tau, matrixT_t &T, matrixY_t &Y)
Reduces a general square matrix to upper Hessenberg form.
Definition lahr2.hpp:61

tlapack::axpy
void axpy(const alpha_t &alpha, const vectorX_t &x, vectorY_t &y)
Add scaled vector, .
Definition axpy.hpp:34

tlapack::trmm
void trmm(Side side, Uplo uplo, Op trans, Diag diag, const alpha_t &alpha, const matrixA_t &A, matrixB_t &B)
Triangular matrix-matrix multiply:
Definition trmm.hpp:72

tlapack::gemm
void gemm(Op transA, Op transB, const alpha_t &alpha, const matrixA_t &A, const matrixB_t &B, const beta_t &beta, matrixC_t &C)
General matrix-matrix multiply:
Definition gemm.hpp:61

tlapack::gehrd_work
int gehrd_work(size_type< matrix_t > ilo, size_type< matrix_t > ihi, matrix_t &A, vector_t &tau, work_t &work, const GehrdOpts &opts={})
Reduces a general square matrix to upper Hessenberg form.   Workspace is provided as an argument.
Definition gehrd.hpp:99

tlapack::gehd2_work
int gehd2_work(size_type< matrix_t > ilo, size_type< matrix_t > ihi, matrix_t &A, vector_t &tau, work_t &work)
Reduces a general square matrix to upper Hessenberg form.   Workspace is provided as an argument.
Definition gehd2.hpp:76

tlapack_check_false
#define tlapack_check_false(cond)
Throw an error if cond is true.
Definition exceptionHandling.hpp:113

tlapack::gehrd_worksize
constexpr WorkInfo gehrd_worksize(size_type< matrix_t > ilo, size_type< matrix_t > ihi, const matrix_t &A, const vector_t &tau, const GehrdOpts &opts={})
Worspace query of gehrd()
Definition gehrd.hpp:52

lahr2.hpp

larfb.hpp
Applies a Householder block reflector to a matrix.

laset.hpp

tlapack
Sort the numbers in D in increasing order (if ID = 'I') or in decreasing order (if ID = 'D' ).
Definition arrayTraits.hpp:15

tlapack::real_type
typename traits::real_type_traits< Types..., int >::type real_type
The common real type of the list of types.
Definition scalar_type_traits.hpp:113

tlapack::LOWER_TRIANGLE
constexpr internal::LowerTriangle LOWER_TRIANGLE
Lower Triangle access.
Definition types.hpp:188

tlapack::RIGHT_SIDE
constexpr internal::RightSide RIGHT_SIDE
right side
Definition types.hpp:296

tlapack::FORWARD
constexpr internal::Forward FORWARD
Forward direction.
Definition types.hpp:381

tlapack::UNIT_DIAG
constexpr internal::UnitDiagonal UNIT_DIAG
The main diagonal is assumed to consist of 1's.
Definition types.hpp:222

tlapack::GENERAL
constexpr internal::GeneralAccess GENERAL
General access.
Definition types.hpp:180

tlapack::CONJ_TRANS
constexpr internal::ConjTranspose CONJ_TRANS
conjugate transpose
Definition types.hpp:264

tlapack::COLUMNWISE_STORAGE
constexpr internal::ColumnwiseStorage COLUMNWISE_STORAGE
Columnwise storage.
Definition types.hpp:414

tlapack::NO_TRANS
constexpr internal::NoTranspose NO_TRANS
no transpose
Definition types.hpp:260

tlapack::LEFT_SIDE
constexpr internal::LeftSide LEFT_SIDE
left side
Definition types.hpp:294

tlapack::GehrdOpts
Options struct for gehrd.
Definition gehrd.hpp:27

tlapack::GehrdOpts::nb
size_t nb
Block size used in the blocked reduction.
Definition gehrd.hpp:28

tlapack::GehrdOpts::nx_switch
size_t nx_switch
If only nx_switch columns are left, the algorithm will use unblocked code.
Definition gehrd.hpp:29

tlapack::WorkInfo
Output information in the workspace query.
Definition workspace.hpp:16