10#ifndef TLAPACK_ROT_SEQUENCE3_HH
11#define TLAPACK_ROT_SEQUENCE3_HH
99 const idx_t m = nrows(
A);
100 const idx_t n = ncols(
A);
101 const idx_t
k = (
side == Side::Left) ? m - 1 : n - 1;
102 const idx_t
l = ncols(
C);
105 const idx_t nb = 256;
114 if (
k < 1
or l < 1)
return;
118 if (
l == 1
or k <
l) {
119 for (idx_t j = 0; j <
l; ++j) {
129 if (
side == Side::Left) {
130 if (direction == Direction::Forward) {
132#pragma omp parallel for
133 for (idx_t
ib = 0;
ib < n;
ib += nb) {
134 idx_t
ib2 = std::min(
ib + nb, n);
136 for (idx_t
i1 = 0;
i1 < n; ++
i1) {
137 for (idx_t j = 0; j <
l - 1; ++j) {
138 for (idx_t i = 0,
g2 = j; i < j + 1; ++i, --
g2) {
139 idx_t
g = m - 2 -
g2;
150 for (idx_t j =
l - 1; j + 1 < m - 1; j += 2) {
151 for (idx_t i = 0,
g2 = j; i + 1 <
l;
153 idx_t
g = m - 2 -
g2;
181 S(
g + 1, i + 1) *
A(
g + 2,
i1);
183 C(
g + 1, i + 1) *
A(
g + 2,
i1);
196 idx_t
g2 = j - (
l - 1);
197 idx_t
g = m - 2 -
g2;
217 for (idx_t j = ((m -
l + 1) % 2); j <
l; ++j) {
218 for (idx_t i = j,
g2 = m - 2; i <
l; ++i, --
g2) {
219 idx_t
g = m - 2 -
g2;
232#pragma omp parallel for
233 for (idx_t
ib = 0;
ib < n;
ib += nb) {
234 idx_t
ib2 = std::min(
ib + nb, n);
237 for (idx_t j = 0; j <
l - 1; ++j) {
238 for (idx_t i = 0,
g = j; i < j + 1; ++i, --
g) {
249 for (idx_t j =
l - 1; j + 1 < m - 1; j += 2) {
250 for (idx_t i = 0,
g = j; i + 1 <
l;
269 S(
g + 1, i) *
A(
g + 2,
i1);
271 C(
g + 1, i) *
A(
g + 2,
i1);
281 A(
g - 1,
i1) =
C(
g - 1, i + 1) *
A(
g - 1,
i1) +
295 idx_t
g = j - (
l - 1);
306 S(
g + 1, i) *
A(
g + 2,
i1);
309 C(
g + 1, i) *
A(
g + 2,
i1);
316 for (idx_t j = ((m -
l + 1) % 2); j <
l; ++j) {
317 for (idx_t i = j,
g = m - 2; i <
l; ++i, --
g) {
330 if (direction == Direction::Forward) {
332#pragma omp parallel for
333 for (idx_t
ib = 0;
ib < m;
ib += nb) {
334 idx_t
ib2 = std::min(
ib + nb, m);
336 for (idx_t j = 0; j <
l - 1; ++j) {
337 for (idx_t i = 0,
g2 = j; i < j + 1; ++i, --
g2) {
338 idx_t
g = n - 2 -
g2;
349 for (idx_t j =
l - 1; j + 1 < n - 1; j += 2) {
350 for (idx_t i = 0,
g2 = j; i + 1 <
l; i += 2,
g2 -= 2) {
351 idx_t
g = n - 2 -
g2;
381 C(
g + 1, i + 1) *
A(
i1,
g + 2);
396 idx_t
g2 = j - (
l - 1);
397 idx_t
g = n - 2 -
g2;
417 for (idx_t j = ((n -
l + 1) % 2); j <
l; ++j) {
418 for (idx_t i = j,
g2 = n - 2; i <
l; ++i, --
g2) {
419 idx_t
g = n - 2 -
g2;
433#pragma omp parallel for
434 for (idx_t
ib = 0;
ib < m;
ib += nb) {
435 idx_t
ib2 = std::min(
ib + nb, m);
437 for (idx_t j = 0; j <
l - 1; ++j) {
438 for (idx_t i = 0,
g = j; i < j + 1; ++i, --
g) {
449 for (idx_t j =
l - 1; j + 1 < n - 1; j += 2) {
450 for (idx_t i = 0,
g = j; i + 1 <
l; i += 2,
g -= 2) {
471 C(
g + 1, i) *
A(
i1,
g + 2);
480 A(
i1,
g - 1) =
C(
g - 1, i + 1) *
A(
i1,
g - 1) +
493 idx_t
g = j - (
l - 1);
507 C(
g + 1, i) *
A(
i1,
g + 2);
513 for (idx_t j = ((n -
l + 1) % 2); j <
l; ++j) {
514 for (idx_t i = j,
g = n - 2; i <
l; ++i, --
g) {
529 if (
side == Side::Left) {
530 if (direction == Direction::Forward) {
532#pragma omp parallel for
533 for (idx_t
ib = 0;
ib < n;
ib += nb) {
534 idx_t
ib2 = std::min(
ib + nb, n);
536 for (idx_t j = 0; j <
l - 1; ++j) {
537 for (idx_t i = 0,
g2 = j; i < j + 1; ++i, --
g2) {
538 idx_t
g = m - 2 -
g2;
539 for (idx_t
i1 = 0;
i1 < n; ++
i1) {
549 for (idx_t j =
l - 1; j + 1 < m - 1; j += 2) {
550 for (idx_t i = 0,
g2 = j; i + 1 <
l; i += 2,
g2 -= 2) {
551 idx_t
g = m - 2 -
g2;
580 S(
g + 1, i + 1) *
A(
g + 2,
i1);
582 C(
g + 1, i + 1) *
A(
g + 2,
i1);
596 idx_t
g2 = j - (
l - 1);
597 idx_t
g = m - 2 -
g2;
617 for (idx_t j = ((m -
l + 1) % 2); j <
l; ++j) {
618 for (idx_t i = j,
g2 = m - 2; i <
l; ++i, --
g2) {
619 idx_t
g = m - 2 -
g2;
633#pragma omp parallel for
634 for (idx_t
ib = 0;
ib < n;
ib += nb) {
635 idx_t
ib2 = std::min(
ib + nb, n);
637 for (idx_t j = 0; j <
l - 1; ++j) {
638 for (idx_t i = 0,
g = j; i < j + 1; ++i, --
g) {
649 for (idx_t j =
l - 1; j + 1 < m - 1; j += 2) {
650 for (idx_t i = 0,
g = j; i + 1 <
l; i += 2,
g -= 2) {
669 S(
g + 1, i) *
A(
g + 2,
i1);
671 C(
g + 1, i) *
A(
g + 2,
i1);
681 A(
g - 1,
i1) =
C(
g - 1, i + 1) *
A(
g - 1,
i1) +
696 idx_t
g = j - (
l - 1);
708 S(
g + 1, i) *
A(
g + 2,
i1);
711 C(
g + 1, i) *
A(
g + 2,
i1);
717 for (idx_t j = ((m -
l + 1) % 2); j <
l; ++j) {
718 for (idx_t i = j,
g = m - 2; i <
l; ++i, --
g) {
732 if (direction == Direction::Forward) {
734#pragma omp parallel for
735 for (idx_t
ib = 0;
ib < m;
ib += nb) {
736 idx_t
ib2 = std::min(
ib + nb, m);
739 for (idx_t j = 0; j <
l - 1; ++j) {
740 for (idx_t i = 0,
g2 = j; i < j + 1; ++i, --
g2) {
741 idx_t
g = n - 2 -
g2;
752 for (idx_t j =
l - 1; j + 1 < n - 1; j += 2) {
753 for (idx_t i = 0,
g2 = j; i + 1 <
l;
755 idx_t
g = n - 2 -
g2;
784 C(
g + 1, i + 1) *
A(
i1,
g + 2);
799 idx_t
g2 = j - (
l - 1);
800 idx_t
g = n - 2 -
g2;
820 for (idx_t j = ((n -
l + 1) % 2); j <
l; ++j) {
821 for (idx_t i = j,
g2 = n - 2; i <
l; ++i, --
g2) {
822 idx_t
g = n - 2 -
g2;
835#pragma omp parallel for
836 for (idx_t
ib = 0;
ib < m;
ib += nb) {
837 idx_t
ib2 = std::min(
ib + nb, m);
840 for (idx_t j = 0; j <
l - 1; ++j) {
841 for (idx_t i = 0,
g = j; i < j + 1; ++i, --
g) {
852 for (idx_t j =
l - 1; j + 1 < n - 1; j += 2) {
853 for (idx_t i = 0,
g = j; i + 1 <
l;
874 C(
g + 1, i) *
A(
i1,
g + 2);
883 A(
i1,
g - 1) =
C(
g - 1, i + 1) *
A(
i1,
g - 1) +
896 idx_t
g = j - (
l - 1);
909 C(
g + 1, i) *
A(
i1,
g + 2);
916 for (idx_t j = ((n -
l + 1) % 2); j <
l; ++j) {
917 for (idx_t i = j,
g = n - 2; i <
l; ++i, --
g) {
constexpr T conj(const T &x) noexcept
Extends std::conj() to real datatypes.
Definition utils.hpp:100
#define TLAPACK_SIDE
Macro for tlapack::concepts::Side compatible with C++17.
Definition concepts.hpp:927
#define TLAPACK_SMATRIX
Macro for tlapack::concepts::SliceableMatrix compatible with C++17.
Definition concepts.hpp:899
#define TLAPACK_DIRECTION
Macro for tlapack::concepts::Direction compatible with C++17.
Definition concepts.hpp:930
int rot_sequence(side_t side, direction_t direction, const C_t &c, const S_t &s, A_t &A)
Applies a sequence of plane rotations to an (m-by-n) matrix.
Definition rot_sequence.hpp:81
void rot_sequence3(side_t side, direction_t direction, const C_t &C, const S_t &S, A_t &A)
Applies a sequence of plane rotations to an (m-by-n) matrix.
Definition rot_sequence3.hpp:92
#define tlapack_check(cond)
Throw an error if cond is false.
Definition exceptionHandling.hpp:98
typename traits::real_type_traits< Types..., int >::type real_type
The common real type of the list of types.
Definition scalar_type_traits.hpp:113