<T>LAPACK 0.1.2
C++ Template Linear Algebra PACKage
Loading...
Searching...
No Matches
laed6.hpp
Go to the documentation of this file.
1
3//
4// Copyright (c) 2025, University of Colorado Denver. All rights reserved.
5//
6// This file is part of <T>LAPACK.
7// <T>LAPACK is free software: you can redistribute it and/or modify it under
8// the terms of the BSD 3-Clause license. See the accompanying LICENSE file.
9
10#ifndef TLAPACK_LAED6_HH
11#define TLAPACK_LAED6_HH
12
13//
15
16namespace tlapack {
17
77template <class d_t, class z_t, class real_t, class idx_t>
78int laed6(idx_t kniter,
79 bool& orgati,
80 real_t rho,
81 d_t& d,
82 z_t& z,
84 real_t& tau)
85{
86 idx_t niter;
87 real_t lbd, ubd, temp, temp1, temp2, temp3, temp4, a, b, c, eta;
89 real_t maxit = real_t(40);
90 int info = 0;
91
92 if (orgati) {
93 lbd = d[1];
94 ubd = d[2];
95 }
96 else {
97 lbd = d[0];
98 ubd = d[1];
99 }
100
101 if (finit < 0) {
102 lbd = real_t(0.0);
103 }
104 else {
105 ubd = real_t(0.0);
106 }
107
108 niter = 0;
109 tau = real_t(0.0);
110 if (kniter == 1) {
111 if (orgati) {
112 temp = (d[2] - d[1]) / real_t(2.0);
113 c = rho + z[0] / ((d[0] - d[1]) - temp);
114 a = c * (d[1] + d[2]) + z[1] + z[2];
115 b = c * d[1] * d[2] + z[1] * d[2] + z[2] * d[1];
116 }
117 else {
118 temp = (d[0] - d[1]) / real_t(2.0);
119 c = rho + z[2] / ((d[2] - d[1]) - temp);
120 a = c * (d[0] + d[1]) + z[0] + z[1];
121 b = c * d[0] * d[1] + z[0] * d[1] + z[1] * d[0];
122 }
123 temp = max(max(abs(a), abs(b)), abs(c));
124 a = a / temp;
125 b = b / temp;
126 c = c / temp;
127 if (c == 0) {
128 tau = b / a;
129 }
130 else if (a <= 0.0) {
131 tau = (a - sqrt(abs(a * a - real_t(4.0) * b * c))) /
132 (real_t(2.0) * c);
133 }
134 else {
135 tau =
136 real_t(2.0) * b / (a + sqrt(abs(a * a - real_t(4.0) * b * c)));
137 }
138
140 tau = (lbd + ubd) / real_t(2.0);
141 }
142
143 if (d[0] == tau || d[1] == tau || d[2] == tau) {
144 tau = real_t(0.0);
145 }
146 else {
147 temp = finit + tau * z[0] / (d[0] * (d[0] - tau)) +
148 tau * z[1] / (d[1] * (d[1] - tau)) +
149 tau * z[2] / (d[2] * (d[2] - tau));
150
151 if (temp <= 0.0) {
152 lbd = tau;
153 }
154 else {
155 ubd = tau;
156 }
157 if (abs(finit) <= abs(temp)) {
158 tau = real_t(0.0);
159 }
160 }
161 }
162
163 // get machine parameters for possible scaling to avoid overflow
164
165 // modified by Sven: parameters SMALL1, SMINV1, SMALL2,
166 // SMINV2, EPS are not SAVEd anymore between one call to the
167 // others but recomputed at each call
168
169 const int base = 2;
171 real_t small1 = pow(base, log(safmin) / log(real_t(base)) / real_t(3.0));
172 real_t sminv1 = real_t(1.0) / small1;
176 std::vector<real_t> dscale(3);
177 std::vector<real_t> zscale(3);
178
179 // Determine if scaling of inputs necessary to avoid overflow when computing
180 // 1/TEMP**3
181
182 if (orgati) {
183 temp = min(abs(d[1] - tau), abs(d[2] - tau));
184 }
185 else {
186 temp = min(abs(d[0] - tau), abs(d[1] - tau));
187 }
188
189 bool scale = false;
190 if (temp <= small1) {
191 scale = true;
192 if (temp <= small2) {
193 // Scale up by power of radix nearest 1/SAFMIN**(2/3)
194 sclfac = sminv2;
195 sclinv = small2;
196 }
197 else {
198 // Scale up by power of radix nearest 1/SAFMIN**(1/3)
199 sclfac = sminv1;
200 sclinv = small1;
201 }
202
203 for (idx_t i = 0; i < 3; i++) {
204 dscale[i] = d[i] * sclfac;
205 zscale[i] = z[i] * sclfac;
206 }
207
208 tau = tau * sclfac;
209 lbd = lbd * sclfac;
210 ubd = ubd * sclfac;
211 }
212 else {
213 // Copy D and Z to DSCALE and ZSCALE
214 for (idx_t i = 0; i < 3; i++) {
215 dscale[i] = d[i];
216 zscale[i] = z[i];
217 }
218 }
219
220 real_t fc = real_t(0.0);
221 real_t df = real_t(0.0);
222 real_t ddf = real_t(0.0);
223
224 for (idx_t i = 0; i < 3; i++) {
225 temp = real_t(1.0) / (dscale[i] - tau);
226 temp1 = zscale[i] * temp;
227 temp2 = temp1 * temp;
228 temp3 = temp2 * temp;
229 fc = fc + temp1 / dscale[i];
230 df = df + temp2;
231 ddf = ddf + temp3;
232 }
233
234 real_t f = finit + tau * fc;
235 bool converge = false;
236
237 if (abs(f) <= 0.0) {
238 converge = true;
239 }
240 if (!converge) {
241 if (f <= 0.0) {
242 lbd = tau;
243 }
244 else {
245 ubd = tau;
246 }
247 }
248
249 // Iteration begins -- Use Gragg-Thornton-Warner cubic convergent scheme
250
251 // It is not hard to see that
252
253 // 1) Iterations will go up monotonically
254 // if FINIT < 0;
255
256 // 2) Iterations will go down monotonically
257 // if FINIT > 0.
258
259 idx_t iter = niter + 1;
260
261 while (iter < maxit) {
262 if (converge) {
263 break;
264 }
265
266 if (orgati) {
267 temp1 = dscale[1] - tau;
268 temp2 = dscale[2] - tau;
269 }
270 else {
271 temp1 = dscale[0] - tau;
272 temp2 = dscale[1] - tau;
273 }
274
275 a = (temp1 + temp2) * f - temp1 * temp2 * df;
276 b = temp1 * temp2 * f;
277 c = f - (temp1 + temp2) * df + temp1 * temp2 * ddf;
278 temp = max(max(abs(a), abs(b)), abs(c));
279 a = a / temp;
280 b = b / temp;
281 c = b / temp;
282
283 if (c == 0.0) {
284 eta = b / a;
285 }
286 else if (a <= 0.0) {
287 eta = (a - sqrt(abs(a * a - real_t(4.0) * b * c))) /
288 (real_t(2.0) * c);
289 }
290 else {
291 eta =
292 real_t(2.0) * b / (a + sqrt(abs(a * a - real_t(4.0) * b * c)));
293 }
294
295 if (f * eta >= 0.0) {
296 eta = -f / df;
297 }
298
299 tau = tau + eta;
301 tau = (lbd + ubd) / real_t(2.0);
302 }
303
304 fc = real_t(0.0);
305 real_t err = real_t(0.0);
306 df = real_t(0.0);
307 ddf = real_t(0.0);
308
309 for (idx_t i = 0; i < 3; i++) {
310 if ((dscale[i] - tau) != 0) {
311 temp = real_t(1.0) / (dscale[i] - tau);
312 temp1 = zscale[i] * temp;
313 temp2 = temp1 * temp;
314 temp3 = temp2 * temp;
315 temp4 = temp1 / dscale[i];
316 fc = fc + temp4;
317 err = err + abs(temp4);
318 df = df + temp2;
319 ddf = ddf + temp3;
320 }
321 else {
322 converge = true;
323 break;
324 }
325 }
326
327 if (!converge) {
328 f = finit + tau * fc;
329 err = real_t(8.0) * (abs(finit) + abs(tau) * err) + abs(tau) * df;
330
331 if ((abs(f) <= real_t(4.0) * eps * err) ||
332 ((ubd - lbd) <= real_t(4.0) * eps * abs(tau))) {
333 converge = true;
334 break;
335 }
336
337 if (f <= 0.0) {
338 lbd = tau;
339 }
340 else {
341 ubd = tau;
342 }
343 }
344 else {
345 break;
346 }
347
348 iter++;
349 }
350
351 if (!converge) {
352 info = 1;
353 }
354
355 // Undo scaling
356 if (scale) {
357 tau = tau * sclinv;
358 }
359
360 return info;
361}
362} // namespace tlapack
363
364#endif // TLAPACK_LAED6_HH
Sort the numbers in D in increasing order (if ID = 'I') or in decreasing order (if ID = 'D' ).
Definition arrayTraits.hpp:15
typename traits::real_type_traits< Types..., int >::type real_type
The common real type of the list of types.
Definition scalar_type_traits.hpp:113
int laed6(idx_t kniter, bool &orgati, real_t rho, d_t &d, z_t &z, real_t &finit, real_t &tau)
LAED6 used by STEDC.
Definition laed6.hpp:78