<T>LAPACK 0.1.2
C++ Template Linear Algebra PACKage
Loading...
Searching...
No Matches
laed2.hpp
Go to the documentation of this file.
1
3//
4// Copyright (c) 2021-2023, University of Colorado Denver. All rights reserved.
5//
6// This file is part of <T>LAPACK.
7// <T>LAPACK is free software: you can redistribute it and/or modify it under
8// the terms of the BSD 3-Clause license. See the accompanying LICENSE file.
9
10// TODO: Need to finished laed2
11
12#ifndef TLAPACK_LAED2_HH
13#define TLAPACK_LAED2_HH
14
15//
17
18//
19#include <tlapack/blas/copy.hpp>
21#include <tlapack/blas/rot.hpp>
22#include <tlapack/blas/scal.hpp>
27
28namespace tlapack {
29
30template <class idx_t,
31 class matrixQ_t,
32 class d_t,
33 class indxq_t,
34 class real_t,
35 class z_t,
36 class dlambda_t,
37 class w_t,
38 class matrixQ2_t,
39 class indx_t,
40 class indxc_t,
41 class indxp_t,
42 class coltyp_t>
43int laed2(idx_t& k,
44 idx_t n,
45 idx_t n1,
46 d_t& d,
47 matrixQ_t& q,
48 idx_t ldq,
50 real_t& rho,
51 z_t z,
53 w_t& w,
55 indx_t& indx,
59{
60 std::cout << std::setprecision(15);
62 // Functor
64
65 int info = 0;
66
67 if (n < 0) {
68 info = -2;
69 }
70 else if (ldq < max(idx_t(1), n)) {
71 info = -6;
72 }
73 else if (min(idx_t(1), (n / 2)) > n1 || (n / 2) < n1) {
74 info = -3;
75 }
76 if (info != 0) {
77 // Call Xerbla
78 return info;
79 }
80
81 // Quick Return if Possible
82 if (n == 0) {
83 return info;
84 }
85
86 idx_t n2 = n - n1;
87 idx_t n1p1 = n1 + 1;
88
89 if (rho < 0) {
90 auto zView = slice(z, range(n1p1 - 1, n));
91 scal(real_t(-1.0), zView);
92 }
93
94 // Normalize z so that norm(z) = 1. Since z is the concatenation of a two
95 // normalized vectors, norm2(z) = sqrt(2).
96 real_t t = real_t(1.0 / sqrt(2.0));
97 scal(t, z);
98
99 // RHO = ABS(norm(z)**2 * RHO)
100 rho = abs(real_t(2.0) * rho);
101
102 // Sort the eigenvalues into increasing order
103 for (idx_t i = n1p1 - 1; i < n; i++) {
104 indxq[i] = indxq[i] + n1;
105 std::cout << "INDXQ[I] =" << indxq[i] << std::endl;
106 }
107
108 // re-integrate the dflated parts from the last pass
109 for (idx_t i = 0; i < n; i++) {
110 dlambda[i] = d[indxq[i]];
111 std::cout << "DLAMBDA[I] = " << dlambda[i] << std::endl;
112 }
113 lamrg(n1, n2, dlambda, 1, 1, indxc);
114 for (idx_t i = 0; i < n; i++) {
115 indx[i] = indxq[indxc[i]];
116 std::cout << "INDXQ[INDXC[I]] = " << indxq[indxc[i]] << std::endl;
117 }
118
119 // Calculate the allowable deflation tolerance
120 idx_t imax = iamax(z);
121 idx_t jmax = iamax(d);
122
123 real_t eps = ulp<real_t>() / real_t(2.);
124
125 real_t tol = real_t(8.0) * eps * max(abs(d[jmax]), abs(z[imax]));
126
127 // If the rank-1 modifier is small enough, everything is already deflated,
128 // we set k to zero, no more needs to be done except to reorganize Q so that
129 // its columns correspond with the elements in D.
130
131 if (rho * abs(z[imax]) <= tol) {
132 k = 0;
133 idx_t iq2 = 0;
134 for (idx_t j = 0; j < n; j++) {
135 idx_t i = indx[j];
136
137 auto qView = slice(q, range(0, n), i);
138 auto q2View = slice(q2, range(iq2, n));
139 copy(qView, q2View);
140
141 dlambda[j] = d[i];
142 iq2 = iq2 + n;
143 }
144
145 auto q2View = new_matrix(q2, n, n);
147 copy(dlambda, d);
148 return info;
149 }
150
151 // If there are multiple eigenvalues then the problem deflates. Here the
152 // number of equal eigenvalues are found. As each equal eigenvalue is
153 // found, an elementary reflector is computed to rotate the corresponding
154 // eigensubspace so that the corresponding components of Z are zero in this
155 // new basis.
156
157 for (idx_t i = 0; i < n1; i++) {
158 coltyp[i] = real_t(0);
159 }
160
161 for (idx_t i = n1p1 - 1; i < n; i++) {
162 coltyp[i] = real_t(2);
163 }
164
165 k = 0;
166 idx_t k2 = n;
167 idx_t pj = 0;
168 real_t nj;
169 bool deflate = false;
170 for (idx_t j = 0; j < n; j++) {
171 nj = indx[j];
172 if (rho * abs(z[nj]) <= tol) {
173 // Deflate due to small z component
174
175 k2 = k2 - 1;
176 coltyp[nj] = real_t(3.0);
177 indxp[k2] = nj;
178 if (j == n - 1) {
179 deflate = true;
180 }
181 }
182 else {
183 pj = nj;
184 }
185
186 std::cout << "NJ = " << nj << std::endl;
187 std::cout << "J = " << j << std::endl;
188 std::cout << "K = " << k << std::endl;
189 std::cout << "K2 = " << k2 << std::endl;
190 std::cout << "PJ = " << pj << std::endl;
191
192 // // LINE 80
193 // if (!deflate) {
194 // if (j < n) {
195 // nj = indx[j + 1];
196 // }
197 // else {
198 // nj = 0;
199 // }
200 // std::cout << "NJ = " << nj << std::endl;
201 // if (j > n - 1) {
202 // break;
203 // }
204 // if (rho * abs(z[nj]) <= tol) {
205 // // Deflate due to small z component
206
207 // k2 = k2 - 1;
208 // coltyp[nj] = real_t(3);
209 // indxp[k2] = nj;
210 // }
211 // else {
212 // // Check fi eign values are close enought to allow deflation
213 // real_t s = z[pj];
214 // real_t c = z[nj];
215
216 // // Find sqrt(a**2 + b**2) without overflow or destructive
217 // // underflow
218 // auto tau = lapy2(c, s);
219 // t = d[nj] - d[pj];
220 // c = c / tau;
221 // s = -s / tau;
222
223 // if (abs(t * c * s) <= tol) {
224 // // Deflation is possible
225
226 // z[nj] = tau;
227 // z[pj] = real_t(0.0);
228 // if (coltyp[nj] != coltyp[pj]) {
229 // coltyp[nj] = real_t(0);
230 // }
231 // coltyp[pj] = real_t(0);
232
233 // auto qView1 = slice(q, range(0, n), pj);
234 // auto qView2 = slice(q, range(0, n), nj);
235 // rot(qView1, qView2, c, s);
236
237 // t = d[pj] * (c * c) + d[nj] * (s * s);
238 // d[nj] = d[pj] * (s * s) + d[nj] * (c * c);
239 // d[pj] = t;
240 // k2 = k2 - 1;
241 // real_t i = 0;
242 // while (true) {
243 // if (k2 + i < n) {
244 // if (d[pj] < d[indxp[k2 + i]]) {
245 // indxp[k2 + i - 1] = indxp[k2 + i];
246 // indxp[k2 + i] = pj;
247 // i = i + 1;
248 // continue;
249 // }
250 // else {
251 // indxp[k2 + i - 1] = pj;
252 // }
253 // }
254 // else {
255 // indxp[k2 + i - 1] = pj;
256 // }
257
258 // pj = nj;
259 // break;
260 // }
261 // }
262 // else {
263 // dlambda[k - 1] = d[pj];
264 // w[k] = z[pj];
265 // indxp[k - 1] = pj;
266 // pj = nj;
267 // k = k + 1;
268 // }
269 // }
270 // }
271 }
272
274
276
277 // Recored the last eigenvalue
278 k = k + 1;
279 dlambda[k] = d[pj];
280 w[k] = z[pj];
281 indxp[k] = pj;
282
283 // Count up the total number of the various types of columns, then form
284 // a permutation which positions the four column types into four uniform
285 // groups (although one or more of these groups may be empty).
286
287 std::vector<real_t> ctot(4);
288
289 for (idx_t j = 0; j < 4; j++) {
290 ctot[j] = real_t(0);
291 }
292
293 for (idx_t j = 0; j < n; j++) {
294 idx_t ct = coltyp[j];
295 ctot[ct] = ctot[ct] + real_t(1);
296 }
297
298 std::vector<real_t> psm(4);
299 // // PSM(*) = Position in SubMatrix (of types 1 through 4)
300
301 psm[0] = real_t(0);
302 psm[1] = ctot[0];
303 psm[2] = psm[1] + ctot[1];
304 psm[3] = psm[2] + ctot[2];
305 k = n - ctot[3] - 1;
306
307 // Fill out the INDXC array so that the permutation which it induces will
308 // place all type-1 columns first, all type-2 columns next then all
309 // type-3's, and finally all type-4's.
310
311 for (idx_t j = 0; j < n; j++) {
312 idx_t js = indxp[j];
313 idx_t ct = coltyp[js];
314 indx[psm[ct]] = js;
315 indxc[psm[ct]] = j;
316 psm[ct] = psm[ct] + real_t(1);
317 }
318
319 // Sort the eigenvalues and corresponding eigenvectors into DLAMBDA and Q2
320 // respectively. The eigenvalues/vectors which were not deflated go
321 // into the first K slots of DLAMBDA and Q2 respectively while those
322 // which were deflated go into the last N - K slots.
323
324 idx_t i = 0;
325 idx_t iq1 = 0;
326 idx_t iq2 = (ctot[0] + ctot[1]) * n1;
327
328 for (idx_t j = 0; j < ctot[0]; j++) {
329 real_t js = indx[i];
330
331 auto qView = slice(q, range(0, n1), js);
332 auto q2View = slice(q2, range{iq1, iq1 + n1});
333 copy(qView, q2View);
334
335 z[i] = d[js];
336 i = i + 1;
337 iq1 = iq1 + n1;
338 }
339
340 for (idx_t j = 0; j < ctot[1]; j++) {
341 real_t js = indx[i];
342
343 auto qView = slice(q, range(0, n1), js);
344 auto q2View = slice(q2, range(iq1, iq1 + n1));
345 copy(qView, q2View);
346
347 qView = slice(q, range(n1, n1 + n2), js);
348 q2View = slice(q2, range(iq2, iq2 + n2));
349 copy(qView, q2View);
350
351 i = i + 1;
352 iq1 = iq1 + n1;
353 iq2 = iq2 + n2;
354 }
355
356 for (idx_t j = 0; j < ctot[2]; j++) {
357 real_t js = indx[i];
358
359 auto qView = slice(q, range(n1, n1 + n2), js);
360 auto q2View = slice(q2, range(iq2, iq2 + n2));
361 copy(qView, q2View);
362
363 z[i] = d[js];
364 i = i + 1;
365 iq2 = iq2 + n2;
366 }
367
368 iq1 = iq2;
369
370 for (idx_t j = 0; j < ctot[3]; j++) {
371 idx_t js = indx[i];
372 auto qView = slice(q, range(0, n), js);
373 iq2 = iq2 + n;
374 z[i] = d[js];
375 i = i + 1;
376 }
377
378 // The deflated eigenvalues and their corresponding vectors go back into
379 // the last N - K slots of D and Q respectively.
380
381 // if (k < n - 1) {
382 // auto q2Mat = new_matrix(q2, n, n);
383 // auto q2View = slice(q2Mat, range(iq1, n), ctot[3]);
384 // auto q1View = slice(q, range(0, n), k);
385 // lacpy(GENERAL, q2View, q1View);
386
387 // copy(slice(z, range{k + 1, n - k}), slice(d, range{k + 1, n - k}));
388 // }
389
390 // Copy CTOT into COLTYP for referencing in DLAED3.
391
392 for (idx_t j = 0; j < 4; j++) {
393 coltyp[j] = ctot[j];
394 }
395
396 return info;
397}
398} // namespace tlapack
399
400#endif // TLAPACK_LAED2_HH
void lacpy(uplo_t uplo, const matrixA_t &A, matrixB_t &B)
Copies a matrix from A to B.
Definition lacpy.hpp:38
void copy(const vectorX_t &x, vectorY_t &y)
Copy vector, .
Definition copy.hpp:31
size_type< vector_t > iamax(const vector_t &x, const IamaxOpts< abs_f > &opts)
Return .
Definition iamax.hpp:234
void scal(const alpha_t &alpha, vector_t &x)
Scale vector by constant, .
Definition scal.hpp:30
Sort the numbers in D in increasing order (if ID = 'I') or in decreasing order (if ID = 'D' ).
Definition arrayTraits.hpp:15
typename traits::real_type_traits< Types..., int >::type real_type
The common real type of the list of types.
Definition scalar_type_traits.hpp:113
constexpr internal::GeneralAccess GENERAL
General access.
Definition types.hpp:180
void lamrg(idx_t n1, idx_t n2, a_t &a, int dtrd1, int dtrd2, idx1_t &index)
DLAMRG creates a permutation list to merge the entries of two independently sorted sets into a single...
Definition lamrg.hpp:64