<T>LAPACK 0.1.1
C++ Template Linear Algebra PACKage
Loading...
Searching...
No Matches
potrf_blocked.hpp
Go to the documentation of this file.
1
4//
5// Copyright (c) 2021-2023, University of Colorado Denver. All rights reserved.
6//
7// This file is part of <T>LAPACK.
8// <T>LAPACK is free software: you can redistribute it and/or modify it under
9// the terms of the BSD 3-Clause license. See the accompanying LICENSE file.
10
11#ifndef TLAPACK_POTRF_BLOCKED_HH
12#define TLAPACK_POTRF_BLOCKED_HH
13
15#include "tlapack/blas/gemm.hpp"
16#include "tlapack/blas/herk.hpp"
17#include "tlapack/blas/trsm.hpp"
19
20namespace tlapack {
21
22struct BlockedCholeskyOpts : public EcOpts {
23 constexpr BlockedCholeskyOpts(const EcOpts& opts = {}) : EcOpts(opts){};
24
25 size_t nb = 32;
26};
27
65template <TLAPACK_UPLO uplo_t, TLAPACK_SMATRIX matrix_t>
67{
68 using T = type_t<matrix_t>;
69 using real_t = real_type<T>;
70 using idx_t = size_type<matrix_t>;
72
73 // Constants
74 const real_t one(1);
75 const idx_t n = nrows(A);
76 const idx_t nb = opts.nb;
77
78 // check arguments
79 tlapack_check(uplo == Uplo::Lower || uplo == Uplo::Upper);
80 tlapack_check(nrows(A) == ncols(A));
81
82 // Quick return
83 if (n <= 0) return 0;
84
85 // Unblocked code
86 else if (nb >= n)
87 return potf2(uplo, A);
88
89 // Blocked code
90 else {
91 if (uplo == Uplo::Upper) {
92 for (idx_t j = 0; j < n; j += nb) {
93 idx_t jb = min(nb, n - j);
94
95 // Define AJJ and A1J
96 auto AJJ = slice(A, range{j, j + jb}, range{j, j + jb});
97 auto A1J = slice(A, range{0, j}, range{j, j + jb});
98
100
102 if (info != 0) {
104 info + j,
105 "The leading minor of the reported order is not "
106 "positive definite,"
107 " and the factorization could not be completed.");
108 return info + j;
109 }
110
111 if (j + jb < n) {
112 // Define B and C
113 auto B = slice(A, range{0, j}, range{j + jb, n});
114 auto C = slice(A, range{j, j + jb}, range{j + jb, n});
115
116 // Compute the current block row
119 one, AJJ, C);
120 }
121 }
122 }
123 else {
124 for (idx_t j = 0; j < n; j += nb) {
125 idx_t jb = min(nb, n - j);
126
127 // Define AJJ and AJ1
128 auto AJJ = slice(A, range{j, j + jb}, range{j, j + jb});
129 auto AJ1 = slice(A, range{j, j + jb}, range{0, j});
130
132
134 if (info != 0) {
136 info + j,
137 "The leading minor of the reported order is not "
138 "positive definite,"
139 " and the factorization could not be completed.");
140 return info + j;
141 }
142
143 if (j + jb < n) {
144 // Define B and C
145 auto B = slice(A, range{j + jb, n}, range{0, j});
146 auto C = slice(A, range{j + jb, n}, range{j, j + jb});
147
148 // Compute the current block row
151 one, AJJ, C);
152 }
153 }
154 }
155
156 return 0;
157 }
158}
159
160template <TLAPACK_UPLO uplo_t,
161 TLAPACK_SMATRIX matrix_t,
162 disable_if_allow_optblas_t<matrix_t> = 0>
163int potrf_blocked(uplo_t uplo, matrix_t& A)
164{
165 return potrf_blocked(uplo, A, {});
166}
167
168#ifdef TLAPACK_USE_LAPACKPP
169
170template <TLAPACK_UPLO uplo_t,
171 TLAPACK_LEGACY_MATRIX matrix_t,
172 enable_if_allow_optblas_t<matrix_t> = 0>
173int potrf_blocked(uplo_t uplo, matrix_t& A)
174{
175 // Legacy objects
176 auto A_ = legacy_matrix(A);
177
178 // Constants to forward
179 const auto& n = A_.n;
180
181 if constexpr (layout<matrix_t> == Layout::ColMajor) {
182 return ::lapack::potrf((::blas::Uplo)(Uplo)uplo, n, A_.ptr, A_.ldim);
183 }
184 else {
185 return ::lapack::potrf(
186 ((uplo == Uplo::Lower) ? ::blas::Uplo::Upper : ::blas::Uplo::Lower),
187 n, A_.ptr, A_.ldim);
188 }
189}
190
191#endif
192
193} // namespace tlapack
194
195#endif // TLAPACK_POTRF_BLOCKED_HH
constexpr internal::LowerTriangle LOWER_TRIANGLE
Lower Triangle access.
Definition types.hpp:183
constexpr internal::UpperTriangle UPPER_TRIANGLE
Upper Triangle access.
Definition types.hpp:181
constexpr internal::RightSide RIGHT_SIDE
right side
Definition types.hpp:291
constexpr internal::NonUnitDiagonal NON_UNIT_DIAG
The main diagonal is not assumed to consist of 1's.
Definition types.hpp:215
constexpr internal::ConjTranspose CONJ_TRANS
conjugate transpose
Definition types.hpp:259
@ Lower
0 <= i <= m, 0 <= j <= i.
constexpr internal::NoTranspose NO_TRANS
no transpose
Definition types.hpp:255
constexpr internal::LeftSide LEFT_SIDE
left side
Definition types.hpp:289
#define TLAPACK_UPLO
Macro for tlapack::concepts::Uplo compatible with C++17.
Definition concepts.hpp:942
#define TLAPACK_SMATRIX
Macro for tlapack::concepts::SliceableMatrix compatible with C++17.
Definition concepts.hpp:899
#define TLAPACK_LEGACY_MATRIX
Macro for tlapack::concepts::LegacyMatrix compatible with C++17.
Definition concepts.hpp:951
void herk(Uplo uplo, Op trans, const alpha_t &alpha, const matrixA_t &A, const beta_t &beta, matrixC_t &C)
Hermitian rank-k update:
Definition herk.hpp:68
void gemm(Op transA, Op transB, const alpha_t &alpha, const matrixA_t &A, const matrixB_t &B, const beta_t &beta, matrixC_t &C)
General matrix-matrix multiply:
Definition gemm.hpp:61
void trsm(Side side, Uplo uplo, Op trans, Diag diag, const alpha_t &alpha, const matrixA_t &A, matrixB_t &B)
Solve the triangular matrix-vector equation.
Definition trsm.hpp:76
int potrf_blocked(uplo_t uplo, matrix_t &A, const BlockedCholeskyOpts &opts)
Computes the Cholesky factorization of a Hermitian positive definite matrix A using a blocked algorit...
Definition potrf_blocked.hpp:66
int potf2(uplo_t uplo, matrix_t &A)
Computes the Cholesky factorization of a Hermitian positive definite matrix A using a level-2 algorit...
Definition potf2.hpp:58
#define tlapack_check(cond)
Throw an error if cond is false.
Definition exceptionHandling.hpp:98
#define tlapack_error(info, detailedInfo)
Error handler.
Definition exceptionHandling.hpp:142
Concept for types that represent tlapack::Uplo.
Computes the Cholesky factorization of a Hermitian positive definite matrix A using a level-2 algorit...
typename traits::real_type_traits< Types..., int >::type real_type
The common real type of the list of types.
Definition scalar_type_traits.hpp:113
Definition potrf_blocked.hpp:22
size_t nb
Block size.
Definition potrf_blocked.hpp:25
Options for error checking.
Definition exceptionHandling.hpp:76