<T>LAPACK 0.1.1
C++ Template Linear Algebra PACKage
Loading...
Searching...
No Matches
multishift_qz_sweep.hpp
Go to the documentation of this file.
1
3//
4// Copyright (c) 2021-2023, University of Colorado Denver. All rights reserved.
5//
6// This file is part of <T>LAPACK.
7// <T>LAPACK is free software: you can redistribute it and/or modify it under
8// the terms of the BSD 3-Clause license. See the accompanying LICENSE file.
9
10#ifndef TLAPACK_QZ_SWEEP_HH
11#define TLAPACK_QZ_SWEEP_HH
12
14#include "tlapack/blas/gemm.hpp"
18
19namespace tlapack {
20
59template <TLAPACK_SMATRIX matrix_t,
60 TLAPACK_VECTOR alpha_t,
61 TLAPACK_VECTOR beta_t,
62 enable_if_t<is_complex<type_t<alpha_t>>, bool> = true>
64 bool want_q,
65 bool want_z,
68 matrix_t& A,
69 matrix_t& B,
70 const alpha_t& alpha,
71 const beta_t& beta,
72 matrix_t& Q,
73 matrix_t& Z)
74{
75 using TA = type_t<matrix_t>;
76 using real_t = real_type<TA>;
77 using idx_t = size_type<matrix_t>;
79
80 const real_t one(1);
81 const real_t zero(0);
82 const idx_t n = ncols(A);
83 const real_t eps = ulp<real_t>();
84 const real_t small_num = safe_min<real_t>() * ((real_t)n / eps);
85
86 // Define workspace matrices
87 // We use the lower triangular part of A and B as workspace
88 const idx_t n_block_max = (n - 3) / 3;
89 const idx_t n_shifts_max =
90 min(ihi - ilo - 1, std::max<idx_t>(2, 3 * (n_block_max / 4)));
91 idx_t n_shifts = std::min<idx_t>(size(alpha), n_shifts_max);
92 if (n_shifts % 2 == 1) n_shifts = n_shifts - 1;
93 idx_t n_bulges = n_shifts / 2;
94
95 const idx_t n_block_desired = std::min<idx_t>(2 * n_shifts, n_block_max);
96
97 // Qc stores the left orthogonal transformations
98 auto Qc =
99 slice(A, range{n - n_block_desired, n}, range{0, n_block_desired});
100 // Zc stores the left orthogonal transformations
101 auto Zc =
102 slice(B, range{n - n_block_desired, n}, range{0, n_block_desired});
103
104 // Workspace for the reflector (note, this overlaps with the multiplication
105 // workspace, they are not needed at the same time)
106 auto v = slice(A, range(n - 3, n), n_block_desired);
107
108 // Workspace for horizontal multiplications
109 auto WH = slice(A, range{n - n_block_desired, n},
111
112 // Workspace for vertical multiplications
113 auto WV = slice(A, range{n_block_desired + 3, n - n_block_desired},
115
116 // Position of the shift train in the pencil.
117 idx_t i_pos_block;
118
119 //
120 // The following code block introduces the bulges into the matrix
121 //
122 {
123 // Near-the-diagonal bulge introduction
124 // The calculations are initially limited to a small window.
125 // The rest is updated later via level 3 BLAS
126 idx_t n_block = min(n_block_desired, ihi - ilo);
127 auto A2 =
128 slice(A, range(ilo, ilo + n_block), range(ilo, ilo + n_block));
129 auto B2 =
130 slice(B, range(ilo, ilo + n_block), range(ilo, ilo + n_block));
131 auto Qc2 = slice(Qc, range{0, n_block}, range{0, n_block});
133 auto Zc2 = slice(Zc, range{0, n_block}, range{0, n_block});
135
136 for (idx_t i_bulge = 0; i_bulge < n_bulges; i_bulge++) {
137 TA t1;
138
139 {
140 auto H = slice(A2, range{0, 3}, range{0, 3});
141 auto T = slice(B2, range{0, 3}, range{0, 3});
143 alpha[2 * i_bulge + 1], beta[2 * i_bulge],
144 beta[2 * i_bulge + 1]);
146 }
147
148 // Apply reflector to the block
149 {
150 t1 = conj(t1);
151 const TA v2 = v[1];
152 const TA t2 = t1 * v2;
153 const TA v3 = v[2];
154 const TA t3 = t1 * v[2];
155 TA sum;
156
157 // Apply reflector from the left to A
158 for (idx_t j = 0; j < n_block; ++j) {
159 sum = A2(0, j) + conj(v2) * A2(1, j) + conj(v3) * A2(2, j);
160 A2(0, j) = A2(0, j) - sum * t1;
161 A2(1, j) = A2(1, j) - sum * t2;
162 A2(2, j) = A2(2, j) - sum * t3;
163 }
164 // Apply reflector from the left to B
165 for (idx_t j = 0; j < n_block; ++j) {
166 sum = B2(0, j) + conj(v2) * B2(1, j) + conj(v3) * B2(2, j);
167 B2(0, j) = B2(0, j) - sum * t1;
168 B2(1, j) = B2(1, j) - sum * t2;
169 B2(2, j) = B2(2, j) - sum * t3;
170 }
171 // Apply reflector to Qc from the right
172 for (idx_t j = 0; j < n_block; ++j) {
173 sum = Qc2(j, 0) + v2 * Qc2(j, 1) + v3 * Qc2(j, 2);
174 Qc2(j, 0) = Qc2(j, 0) - sum * conj(t1);
175 Qc2(j, 1) = Qc2(j, 1) - sum * conj(t2);
176 Qc2(j, 2) = Qc2(j, 2) - sum * conj(t3);
177 }
178 }
179
180 // Move the bulge down to make room for another bulge
181 for (idx_t i = 0; i < n_block - 3 - 2 * i_bulge; ++i) {
182 // Remove fill-in from B using an inverse reflector
183 {
184 auto T = slice(B2, range{i + 1, i + 3}, range{i, i + 3});
185 inv_house3(T, v, t1);
186 }
187
188 // Apply the reflector
189 {
190 t1 = conj(t1);
191 const TA v2 = v[1];
192 const TA t2 = t1 * v2;
193 const TA v3 = v[2];
194 const TA t3 = t1 * v[2];
195 TA sum;
196
197 // Apply reflector from the right to B
198 for (idx_t j = 0; j < i + 3; ++j) {
199 sum = B2(j, i) + v2 * B2(j, i + 1) + v3 * B2(j, i + 2);
200 B2(j, i) = B2(j, i) - sum * conj(t1);
201 B2(j, i + 1) = B2(j, i + 1) - sum * conj(t2);
202 B2(j, i + 2) = B2(j, i + 2) - sum * conj(t3);
203 }
204 B2(i + 1, i) = (TA)0;
205 B2(i + 2, i) = (TA)0;
206 // Apply reflector from the right to A
207 for (idx_t j = 0; j < min(i + 4, n_block); ++j) {
208 sum = A2(j, i) + v2 * A2(j, i + 1) + v3 * A2(j, i + 2);
209 A2(j, i) = A2(j, i) - sum * conj(t1);
210 A2(j, i + 1) = A2(j, i + 1) - sum * conj(t2);
211 A2(j, i + 2) = A2(j, i + 2) - sum * conj(t3);
212 }
213 // Apply reflector to Zc from the right
214 for (idx_t j = 0; j < n_block; ++j) {
215 sum =
216 Zc2(j, i) + v2 * Zc2(j, i + 1) + v3 * Zc2(j, i + 2);
217 Zc2(j, i) = Zc2(j, i) - sum * conj(t1);
218 Zc2(j, i + 1) = Zc2(j, i + 1) - sum * conj(t2);
219 Zc2(j, i + 2) = Zc2(j, i + 2) - sum * conj(t3);
220 }
221 }
222
223 // Calculate a reflector to move the bulge one position
224 v[0] = A2(i + 1, i);
225 v[1] = A2(i + 2, i);
226 v[2] = A2(i + 3, i);
228 A2(i + 1, i) = v[0];
229 A2(i + 2, i) = zero;
230 A2(i + 3, i) = zero;
231
232 // Apply reflector
233 {
234 t1 = conj(t1);
235 const TA v2 = v[1];
236 const TA t2 = t1 * v2;
237 const TA v3 = v[2];
238 const TA t3 = t1 * v[2];
239 TA sum;
240
241 // Apply reflector from the left to A
242 for (idx_t j = i + 1; j < n_block; ++j) {
243 sum = A2(i + 1, j) + conj(v2) * A2(i + 2, j) +
244 conj(v3) * A2(i + 3, j);
245 A2(i + 1, j) = A2(i + 1, j) - sum * t1;
246 A2(i + 2, j) = A2(i + 2, j) - sum * t2;
247 A2(i + 3, j) = A2(i + 3, j) - sum * t3;
248 }
249 // Apply reflector from the left to B
250 for (idx_t j = i + 1; j < n_block; ++j) {
251 sum = B2(i + 1, j) + conj(v2) * B2(i + 2, j) +
252 conj(v3) * B2(i + 3, j);
253 B2(i + 1, j) = B2(i + 1, j) - sum * t1;
254 B2(i + 2, j) = B2(i + 2, j) - sum * t2;
255 B2(i + 3, j) = B2(i + 3, j) - sum * t3;
256 }
257 // Apply reflector to Qc from the right
258 for (idx_t j = 0; j < n_block; ++j) {
259 sum = Qc2(j, i + 1) + v2 * Qc2(j, i + 2) +
260 v3 * Qc2(j, i + 3);
261 Qc2(j, i + 1) = Qc2(j, i + 1) - sum * conj(t1);
262 Qc2(j, i + 2) = Qc2(j, i + 2) - sum * conj(t2);
263 Qc2(j, i + 3) = Qc2(j, i + 3) - sum * conj(t3);
264 }
265 }
266 }
267 }
268 // Update rest of the matrix
269 idx_t istart_m, istop_m;
270 if (want_t) {
271 istart_m = 0;
272 istop_m = n;
273 }
274 else {
275 istart_m = ilo;
276 istop_m = ihi;
277 }
278 // Update A
279 if (ilo + n_shifts + 1 < istop_m) {
280 idx_t i = ilo + n_block;
281 while (i < istop_m) {
282 idx_t iblock = std::min<idx_t>(istop_m - i, ncols(WH));
283 auto A_slice =
284 slice(A, range{ilo, ilo + n_block}, range{i, i + iblock});
285 auto WH_slice = slice(WH, range{0, nrows(A_slice)},
286 range{0, ncols(A_slice)});
289 i = i + iblock;
290 }
291 }
292 if (istart_m < ilo) {
293 idx_t i = istart_m;
294 while (i < ilo) {
295 idx_t iblock = std::min<idx_t>(ilo - i, nrows(WV));
296 auto A_slice =
297 slice(A, range{i, i + iblock}, range{ilo, ilo + n_block});
298 auto WV_slice = slice(WV, range{0, nrows(A_slice)},
299 range{0, ncols(A_slice)});
302 i = i + iblock;
303 }
304 }
305 // Update B
306 if (ilo + n_shifts + 1 < istop_m) {
307 idx_t i = ilo + n_block;
308 while (i < istop_m) {
309 idx_t iblock = std::min<idx_t>(istop_m - i, ncols(WH));
310 auto B_slice =
311 slice(B, range{ilo, ilo + n_block}, range{i, i + iblock});
312 auto WH_slice = slice(WH, range{0, nrows(B_slice)},
313 range{0, ncols(B_slice)});
316 i = i + iblock;
317 }
318 }
319 if (istart_m < ilo) {
320 idx_t i = istart_m;
321 while (i < ilo) {
322 idx_t iblock = std::min<idx_t>(ilo - i, nrows(WV));
323 auto B_slice =
324 slice(B, range{i, i + iblock}, range{ilo, ilo + n_block});
325 auto WV_slice = slice(WV, range{0, nrows(B_slice)},
326 range{0, ncols(B_slice)});
329 i = i + iblock;
330 }
331 }
332 // Update Q
333 if (want_q) {
334 idx_t i = 0;
335 while (i < n) {
336 idx_t iblock = std::min<idx_t>(n - i, nrows(WV));
337 auto Q_slice =
338 slice(Q, range{i, i + iblock}, range{ilo, ilo + n_block});
339 auto WV_slice = slice(WV, range{0, nrows(Q_slice)},
340 range{0, ncols(Q_slice)});
343 i = i + iblock;
344 }
345 }
346 // Update Z
347 if (want_z) {
348 idx_t i = 0;
349 while (i < n) {
350 idx_t iblock = std::min<idx_t>(n - i, nrows(WV));
351 auto Z_slice =
352 slice(Z, range{i, i + iblock}, range{ilo, ilo + n_block});
353 auto WV_slice = slice(WV, range{0, nrows(Z_slice)},
354 range{0, ncols(Z_slice)});
357 i = i + iblock;
358 }
359 }
360
362 }
363
364 //
365 // The following code block moves the bulges down until they are low enough
366 // to be removed
367 //
368 while (i_pos_block + n_block_desired < ihi) {
369 // Number of positions each bulge will be moved down
370 idx_t n_pos = std::min<idx_t>(n_block_desired - n_shifts,
372 // Actual blocksize
373 idx_t n_block = n_shifts + n_pos;
374
375 auto Qc2 = slice(Qc, range{0, n_block}, range{0, n_block});
377 auto Zc2 = slice(Zc, range{0, n_block}, range{0, n_block});
379
380 // Near-the-diagonal bulge chase
381 // The calculations are initially limited to a small window
382 // The rest is updated later via level 3 BLAS
383 auto A2 = slice(A, range(i_pos_block, i_pos_block + 1 + n_block),
385 auto B2 = slice(B, range(i_pos_block, i_pos_block + 1 + n_block),
387
388 for (idx_t i_bulge = 0; i_bulge < n_bulges; i_bulge++) {
389 TA t1;
390 idx_t i2 = 2 * (n_bulges - i_bulge - 1);
391 for (idx_t i = i2; i < i2 + n_pos; ++i) {
392 // Remove fill-in from B using an inverse reflector
393 {
394 auto T = slice(B2, range{i + 1, i + 3}, range{i, i + 3});
395 inv_house3(T, v, t1);
396 }
397
398 // Apply the reflector
399 {
400 t1 = conj(t1);
401 const TA v2 = v[1];
402 const TA t2 = t1 * v2;
403 const TA v3 = v[2];
404 const TA t3 = t1 * v[2];
405 TA sum;
406
407 // Apply reflector from the right to B
408 for (idx_t j = 0; j < i + 3; ++j) {
409 sum = B2(j, i) + v2 * B2(j, i + 1) + v3 * B2(j, i + 2);
410 B2(j, i) = B2(j, i) - sum * conj(t1);
411 B2(j, i + 1) = B2(j, i + 1) - sum * conj(t2);
412 B2(j, i + 2) = B2(j, i + 2) - sum * conj(t3);
413 }
414 B2(i + 1, i) = (TA)0;
415 B2(i + 2, i) = (TA)0;
416 // Apply reflector from the right to A
417 for (idx_t j = 0; j < i + 4; ++j) {
418 sum = A2(j, i) + v2 * A2(j, i + 1) + v3 * A2(j, i + 2);
419 A2(j, i) = A2(j, i) - sum * conj(t1);
420 A2(j, i + 1) = A2(j, i + 1) - sum * conj(t2);
421 A2(j, i + 2) = A2(j, i + 2) - sum * conj(t3);
422 }
423 // Apply reflector to Zc from the right
424 for (idx_t j = 0; j < n_block; ++j) {
425 sum =
426 Zc2(j, i) + v2 * Zc2(j, i + 1) + v3 * Zc2(j, i + 2);
427 Zc2(j, i) = Zc2(j, i) - sum * conj(t1);
428 Zc2(j, i + 1) = Zc2(j, i + 1) - sum * conj(t2);
429 Zc2(j, i + 2) = Zc2(j, i + 2) - sum * conj(t3);
430 }
431 }
432
433 // Calculate a reflector to move the bulge one position
434 v[0] = A2(i + 1, i);
435 v[1] = A2(i + 2, i);
436 v[2] = A2(i + 3, i);
438 A2(i + 1, i) = v[0];
439 A2(i + 2, i) = zero;
440 A2(i + 3, i) = zero;
441
442 // Apply reflector
443 {
444 t1 = conj(t1);
445 const TA v2 = v[1];
446 const TA t2 = t1 * v2;
447 const TA v3 = v[2];
448 const TA t3 = t1 * v[2];
449 TA sum;
450
451 // Apply reflector from the left to A
452 for (idx_t j = i + 1; j < n_block; ++j) {
453 sum = A2(i + 1, j) + conj(v2) * A2(i + 2, j) +
454 conj(v3) * A2(i + 3, j);
455 A2(i + 1, j) = A2(i + 1, j) - sum * t1;
456 A2(i + 2, j) = A2(i + 2, j) - sum * t2;
457 A2(i + 3, j) = A2(i + 3, j) - sum * t3;
458 }
459 // Apply reflector from the left to B
460 for (idx_t j = i + 1; j < n_block; ++j) {
461 sum = B2(i + 1, j) + conj(v2) * B2(i + 2, j) +
462 conj(v3) * B2(i + 3, j);
463 B2(i + 1, j) = B2(i + 1, j) - sum * t1;
464 B2(i + 2, j) = B2(i + 2, j) - sum * t2;
465 B2(i + 3, j) = B2(i + 3, j) - sum * t3;
466 }
467 // Apply reflector to Qc from the right
468 for (idx_t j = 0; j < n_block; ++j) {
469 sum =
470 Qc2(j, i) + v2 * Qc2(j, i + 1) + v3 * Qc2(j, i + 2);
471 Qc2(j, i) = Qc2(j, i) - sum * conj(t1);
472 Qc2(j, i + 1) = Qc2(j, i + 1) - sum * conj(t2);
473 Qc2(j, i + 2) = Qc2(j, i + 2) - sum * conj(t3);
474 }
475 }
476 }
477 }
478 // Update rest of the matrix
479 idx_t istart_m, istop_m;
480 if (want_t) {
481 istart_m = 0;
482 istop_m = n;
483 }
484 else {
485 istart_m = ilo;
486 istop_m = ihi;
487 }
488 // Update A
489 if (i_pos_block + n_block < istop_m) {
490 idx_t i = i_pos_block + n_block;
491 while (i < istop_m) {
492 idx_t iblock = std::min<idx_t>(istop_m - i, ncols(WH));
493 auto A_slice =
494 slice(A, range{i_pos_block + 1, i_pos_block + 1 + n_block},
495 range{i, i + iblock});
496 auto WH_slice = slice(WH, range{0, nrows(A_slice)},
497 range{0, ncols(A_slice)});
500 i = i + iblock;
501 }
502 }
503 if (istart_m < i_pos_block) {
504 idx_t i = istart_m;
505 while (i < i_pos_block) {
506 idx_t iblock = std::min<idx_t>(i_pos_block - i, nrows(WV));
507 auto A_slice = slice(A, range{i, i + iblock},
509 auto WV_slice = slice(WV, range{0, nrows(A_slice)},
510 range{0, ncols(A_slice)});
513 i = i + iblock;
514 }
515 }
516 // Update B
517 if (i_pos_block + n_block < istop_m) {
518 idx_t i = i_pos_block + n_block;
519 while (i < istop_m) {
520 idx_t iblock = std::min<idx_t>(istop_m - i, ncols(WH));
521 auto B_slice =
522 slice(B, range{i_pos_block + 1, i_pos_block + 1 + n_block},
523 range{i, i + iblock});
524 auto WH_slice = slice(WH, range{0, nrows(B_slice)},
525 range{0, ncols(B_slice)});
528 i = i + iblock;
529 }
530 }
531 if (istart_m < i_pos_block) {
532 idx_t i = istart_m;
533 while (i < i_pos_block) {
534 idx_t iblock = std::min<idx_t>(i_pos_block - i, nrows(WV));
535 auto B_slice = slice(B, range{i, i + iblock},
537 auto WV_slice = slice(WV, range{0, nrows(B_slice)},
538 range{0, ncols(B_slice)});
541 i = i + iblock;
542 }
543 }
544 // Update Q
545 if (want_q) {
546 idx_t i = 0;
547 while (i < n) {
548 idx_t iblock = std::min<idx_t>(n - i, nrows(WV));
549 auto Q_slice =
550 slice(Q, range{i, i + iblock},
552 auto WV_slice = slice(WV, range{0, nrows(Q_slice)},
553 range{0, ncols(Q_slice)});
556 i = i + iblock;
557 }
558 }
559 // Update Z
560 if (want_z) {
561 idx_t i = 0;
562 while (i < n) {
563 idx_t iblock = std::min<idx_t>(n - i, nrows(WV));
564 auto Z_slice = slice(Z, range{i, i + iblock},
566 auto WV_slice = slice(WV, range{0, nrows(Z_slice)},
567 range{0, ncols(Z_slice)});
570 i = i + iblock;
571 }
572 }
573
575 }
576
577 //
578 // The following code removes the bulges from the matrix
579 //
580 {
581 // Actual blocksize
582 idx_t n_block = ihi - i_pos_block;
583
584 auto Qc2 = slice(Qc, range{0, n_block}, range{0, n_block});
586 auto Zc2 = slice(Zc, range{0, n_block}, range{0, n_block});
588
589 // The calculations are initially limited to a small window
590 // The rest is updated later via level 3 BLAS
591 auto A2 = slice(A, range(i_pos_block, ihi), range(i_pos_block, ihi));
592 auto B2 = slice(B, range(i_pos_block, ihi), range(i_pos_block, ihi));
593
594 for (idx_t i_bulge = 0; i_bulge < n_bulges; i_bulge++) {
595 TA t1;
596 idx_t i2 = 2 * (n_bulges - i_bulge - 1);
597 for (idx_t i = i2; i < n_block - 1; ++i) {
598 // Remove fill-in from B using an inverse reflector
599 if (i + 2 < n_block) {
600 auto T = slice(B2, range{i + 1, i + 3}, range{i, i + 3});
601 inv_house3(T, v, t1);
602
603 // Apply the reflector
604 {
605 t1 = conj(t1);
606 const TA v2 = v[1];
607 const TA t2 = t1 * v2;
608 const TA v3 = v[2];
609 const TA t3 = t1 * v[2];
610 TA sum;
611
612 // Apply reflector from the right to B
613 for (idx_t j = 0; j < i + 3; ++j) {
614 sum = B2(j, i) + v2 * B2(j, i + 1) +
615 v3 * B2(j, i + 2);
616 B2(j, i) = B2(j, i) - sum * conj(t1);
617 B2(j, i + 1) = B2(j, i + 1) - sum * conj(t2);
618 B2(j, i + 2) = B2(j, i + 2) - sum * conj(t3);
619 }
620 B2(i + 1, i) = (TA)0;
621 B2(i + 2, i) = (TA)0;
622 // Apply reflector from the right to A
623 for (idx_t j = 0; j < min(n_block, i + 4); ++j) {
624 sum = A2(j, i) + v2 * A2(j, i + 1) +
625 v3 * A2(j, i + 2);
626 A2(j, i) = A2(j, i) - sum * conj(t1);
627 A2(j, i + 1) = A2(j, i + 1) - sum * conj(t2);
628 A2(j, i + 2) = A2(j, i + 2) - sum * conj(t3);
629 }
630 // Apply reflector to Zc from the right
631 for (idx_t j = 0; j < n_block; ++j) {
632 sum = Zc2(j, i) + v2 * Zc2(j, i + 1) +
633 v3 * Zc2(j, i + 2);
634 Zc2(j, i) = Zc2(j, i) - sum * conj(t1);
635 Zc2(j, i + 1) = Zc2(j, i + 1) - sum * conj(t2);
636 Zc2(j, i + 2) = Zc2(j, i + 2) - sum * conj(t3);
637 }
638 }
639 }
640 else {
641 // For the final 2x2 block, use a rotation instead of a
642 // reflector
643 real_t c2;
644 TA s2;
645 rotg(B2(i + 1, i + 1), B2(i + 1, i), c2, s2);
646 s2 = -s2;
647 B2(i + 1, i) = (TA)0.;
648
649 // Apply rotation
650 {
651 auto b1 = slice(B2, range{0, i + 1}, i);
652 auto b2 = slice(B2, range{0, i + 1}, i + 1);
653 rot(b1, b2, c2, conj(s2));
654 auto a1 = col(A2, i);
655 auto a2 = col(A2, i + 1);
656 rot(a1, a2, c2, conj(s2));
657 if (want_z) {
658 auto z1 = col(Zc2, i);
659 auto z2 = col(Zc2, i + 1);
660 rot(z1, z2, c2, conj(s2));
661 }
662 }
663 }
664
665 // Calculate a reflector to move the bulge one position
666 if (i + 2 < n_block) {
667 if (i + 3 < n_block) {
668 // Normal size bulge
669 v[0] = A2(i + 1, i);
670 v[1] = A2(i + 2, i);
671 v[2] = A2(i + 3, i);
673 A2(i + 1, i) = v[0];
674 A2(i + 2, i) = zero;
675 A2(i + 3, i) = zero;
676
677 // Apply reflector
678 {
679 t1 = conj(t1);
680 const TA v2 = v[1];
681 const TA t2 = t1 * v2;
682 const TA v3 = v[2];
683 const TA t3 = t1 * v[2];
684 TA sum;
685
686 // Apply reflector from the left to A
687 for (idx_t j = i + 1; j < n_block; ++j) {
688 sum = A2(i + 1, j) + conj(v2) * A2(i + 2, j) +
689 conj(v3) * A2(i + 3, j);
690 A2(i + 1, j) = A2(i + 1, j) - sum * t1;
691 A2(i + 2, j) = A2(i + 2, j) - sum * t2;
692 A2(i + 3, j) = A2(i + 3, j) - sum * t3;
693 }
694 // Apply reflector from the left to B
695 for (idx_t j = i + 1; j < n_block; ++j) {
696 sum = B2(i + 1, j) + conj(v2) * B2(i + 2, j) +
697 conj(v3) * B2(i + 3, j);
698 B2(i + 1, j) = B2(i + 1, j) - sum * t1;
699 B2(i + 2, j) = B2(i + 2, j) - sum * t2;
700 B2(i + 3, j) = B2(i + 3, j) - sum * t3;
701 }
702 // Apply reflector to Qc from the right
703 for (idx_t j = 0; j < n_block; ++j) {
704 sum = Qc2(j, i + 1) + v2 * Qc2(j, i + 2) +
705 v3 * Qc2(j, i + 3);
706 Qc2(j, i + 1) = Qc2(j, i + 1) - sum * conj(t1);
707 Qc2(j, i + 2) = Qc2(j, i + 2) - sum * conj(t2);
708 Qc2(j, i + 3) = Qc2(j, i + 3) - sum * conj(t3);
709 }
710 }
711 }
712 else {
713 // Small 2x2 bulge
714 v[0] = A2(i + 1, i);
715 v[1] = A2(i + 2, i);
716 v[2] = zero;
718 A2(i + 1, i) = v[0];
719 A2(i + 2, i) = zero;
720
721 // Apply reflector
722 {
723 t1 = conj(t1);
724 const TA v2 = v[1];
725 const TA t2 = t1 * v2;
726 TA sum;
727
728 // Apply reflector from the left to A
729 for (idx_t j = i + 1; j < n_block; ++j) {
730 sum = A2(i + 1, j) + conj(v2) * A2(i + 2, j);
731 A2(i + 1, j) = A2(i + 1, j) - sum * t1;
732 A2(i + 2, j) = A2(i + 2, j) - sum * t2;
733 }
734 // Apply reflector from the left to B
735 for (idx_t j = i + 1; j < n_block; ++j) {
736 sum = B2(i + 1, j) + conj(v2) * B2(i + 2, j);
737 B2(i + 1, j) = B2(i + 1, j) - sum * t1;
738 B2(i + 2, j) = B2(i + 2, j) - sum * t2;
739 }
740 // Apply reflector to Qc from the right
741 for (idx_t j = 0; j < n_block; ++j) {
742 sum = Qc2(j, i + 1) + v2 * Qc2(j, i + 2);
743 Qc2(j, i + 1) = Qc2(j, i + 1) - sum * conj(t1);
744 Qc2(j, i + 2) = Qc2(j, i + 2) - sum * conj(t2);
745 }
746 }
747 }
748 }
749 }
750 }
751 // Update rest of the matrix
752 idx_t istart_m, istop_m;
753 if (want_t) {
754 istart_m = 0;
755 istop_m = n;
756 }
757 else {
758 istart_m = ilo;
759 istop_m = ihi;
760 }
761 // Update A
762 if (i_pos_block + n_block < istop_m) {
763 idx_t i = i_pos_block + n_block;
764 while (i < istop_m) {
765 idx_t iblock = std::min<idx_t>(istop_m - i, ncols(WH));
766 auto A_slice =
768 range{i, i + iblock});
769 auto WH_slice = slice(WH, range{0, nrows(A_slice)},
770 range{0, ncols(A_slice)});
773 i = i + iblock;
774 }
775 }
776 if (istart_m < i_pos_block) {
777 idx_t i = istart_m;
778 while (i < i_pos_block) {
779 idx_t iblock = std::min<idx_t>(i_pos_block - i, nrows(WV));
780 auto A_slice = slice(A, range{i, i + iblock},
782 auto WV_slice = slice(WV, range{0, nrows(A_slice)},
783 range{0, ncols(A_slice)});
786 i = i + iblock;
787 }
788 }
789 // Update B
790 if (i_pos_block + n_block < istop_m) {
791 idx_t i = i_pos_block + n_block;
792 while (i < istop_m) {
793 idx_t iblock = std::min<idx_t>(istop_m - i, ncols(WH));
794 auto B_slice =
796 range{i, i + iblock});
797 auto WH_slice = slice(WH, range{0, nrows(B_slice)},
798 range{0, ncols(B_slice)});
801 i = i + iblock;
802 }
803 }
804 if (istart_m < i_pos_block) {
805 idx_t i = istart_m;
806 while (i < i_pos_block) {
807 idx_t iblock = std::min<idx_t>(i_pos_block - i, nrows(WV));
808 auto B_slice = slice(B, range{i, i + iblock},
810 auto WV_slice = slice(WV, range{0, nrows(B_slice)},
811 range{0, ncols(B_slice)});
814 i = i + iblock;
815 }
816 }
817 // Update Q
818 if (want_q) {
819 idx_t i = 0;
820 while (i < n) {
821 idx_t iblock = std::min<idx_t>(n - i, nrows(WV));
822 auto Q_slice = slice(Q, range{i, i + iblock},
824 auto WV_slice = slice(WV, range{0, nrows(Q_slice)},
825 range{0, ncols(Q_slice)});
828 i = i + iblock;
829 }
830 }
831 // Update Z
832 if (want_z) {
833 idx_t i = 0;
834 while (i < n) {
835 idx_t iblock = std::min<idx_t>(n - i, nrows(WV));
836 auto Z_slice = slice(Z, range{i, i + iblock},
838 auto WV_slice = slice(WV, range{0, nrows(Z_slice)},
839 range{0, ncols(Z_slice)});
842 i = i + iblock;
843 }
844 }
845 }
846}
847
848} // namespace tlapack
849
850#endif // TLAPACK_QZ_SWEEP_HH
constexpr internal::Forward FORWARD
Forward direction.
Definition types.hpp:376
constexpr internal::GeneralAccess GENERAL
General access.
Definition types.hpp:175
constexpr internal::ConjTranspose CONJ_TRANS
conjugate transpose
Definition types.hpp:259
constexpr internal::ColumnwiseStorage COLUMNWISE_STORAGE
Columnwise storage.
Definition types.hpp:409
constexpr internal::NoTranspose NO_TRANS
no transpose
Definition types.hpp:255
constexpr T conj(const T &x) noexcept
Extends std::conj() to real datatypes.
Definition utils.hpp:100
#define TLAPACK_SMATRIX
Macro for tlapack::concepts::SliceableMatrix compatible with C++17.
Definition concepts.hpp:899
#define TLAPACK_VECTOR
Macro for tlapack::concepts::Vector compatible with C++17.
Definition concepts.hpp:906
void multishift_QZ_sweep(bool want_t, bool want_q, bool want_z, size_type< matrix_t > ilo, size_type< matrix_t > ihi, matrix_t &A, matrix_t &B, const alpha_t &alpha, const beta_t &beta, matrix_t &Q, matrix_t &Z)
multishift_QR_sweep performs a single small-bulge multi-shift QR sweep.
Definition multishift_qz_sweep.hpp:63
void laset(uplo_t uplo, const type_t< matrix_t > &alpha, const type_t< matrix_t > &beta, matrix_t &A)
Initializes a matrix to diagonal and off-diagonal values.
Definition laset.hpp:38
void larfg(storage_t storeMode, type_t< vector_t > &alpha, vector_t &x, type_t< vector_t > &tau)
Generates a elementary Householder reflection.
Definition larfg.hpp:73
void lacpy(uplo_t uplo, const matrixA_t &A, matrixB_t &B)
Copies a matrix from A to B.
Definition lacpy.hpp:38
void inv_house3(const matrix_t &A, vector_t &v, type_t< vector_t > &tau)
Inv_house calculates a reflector to reduce the first column in a 2x3 matrix A from the right to zero.
Definition inv_house3.hpp:44
int lahqz_shiftcolumn(const matrix_t &A, const matrix_t &B, vector_t &v, complex_type< type_t< matrix_t > > s1, complex_type< type_t< matrix_t > > s2, type_t< matrix_t > beta1, type_t< matrix_t > beta2)
Given a 2-by-2 or 3-by-3 matrix pencil (A,B), lahqz_shiftcolumn calculates a multiple of the product:...
Definition lahqz_shiftcolumn.hpp:56
void rotg(T &a, T &b, T &c, T &s)
Construct plane rotation that eliminates b, such that:
Definition rotg.hpp:39
void rot(vectorX_t &x, vectorY_t &y, const c_type &c, const s_type &s)
Apply plane rotation:
Definition rot.hpp:44
void gemm(Op transA, Op transB, const alpha_t &alpha, const matrixA_t &A, const matrixB_t &B, const beta_t &beta, matrixC_t &C)
General matrix-matrix multiply:
Definition gemm.hpp:61
typename traits::real_type_traits< Types..., int >::type real_type
The common real type of the list of types.
Definition scalar_type_traits.hpp:113