tlapack/laed6_8hpp_source.html

//

// Copyright (c) 2025, University of Colorado Denver. All rights reserved.

//

// This file is part of <T>LAPACK.

// <T>LAPACK is free software: you can redistribute it and/or modify it under

// the terms of the BSD 3-Clause license. See the accompanying LICENSE file.


#ifndef TLAPACK_LAED6_HH

#define TLAPACK_LAED6_HH


//

#include "tlapack/base/utils.hpp"


namespace tlapack {


template <class d_t, class z_t, class real_t, class idx_t>


int laed6(idx_t kniter,

          bool& orgati,

          real_t rho,

          d_t& d,

          z_t& z,

          real_t& finit,

          real_t& tau)

{

    idx_t niter;

    real_t lbd, ubd, temp, temp1, temp2, temp3, temp4, a, b, c, eta;

    real_t eps = ulp<real_t>();

    real_t maxit = real_t(40);

    int info = 0;


    if (orgati) {

        lbd = d[1];

        ubd = d[2];

    }

    else {

        lbd = d[0];

        ubd = d[1];

    }


    if (finit < 0) {

        lbd = real_t(0.0);

    }

    else {

        ubd = real_t(0.0);

    }


    niter = 0;

    tau = real_t(0.0);

    if (kniter == 1) {

        if (orgati) {

            temp = (d[2] - d[1]) / real_t(2.0);

            c = rho + z[0] / ((d[0] - d[1]) - temp);

            a = c * (d[1] + d[2]) + z[1] + z[2];

            b = c * d[1] * d[2] + z[1] * d[2] + z[2] * d[1];

        }

        else {

            temp = (d[0] - d[1]) / real_t(2.0);

            c = rho + z[2] / ((d[2] - d[1]) - temp);

            a = c * (d[0] + d[1]) + z[0] + z[1];

            b = c * d[0] * d[1] + z[0] * d[1] + z[1] * d[0];

        }

        temp = max(max(abs(a), abs(b)), abs(c));

        a = a / temp;

        b = b / temp;

        c = c / temp;

        if (c == 0) {

            tau = b / a;

        }

        else if (a <= 0.0) {

            tau = (a - sqrt(abs(a * a - real_t(4.0) * b * c))) /

                  (real_t(2.0) * c);

        }

        else {

            tau =

                real_t(2.0) * b / (a + sqrt(abs(a * a - real_t(4.0) * b * c)));

        }


        if (tau < lbd || tau > ubd) {

            tau = (lbd + ubd) / real_t(2.0);

        }


        if (d[0] == tau || d[1] == tau || d[2] == tau) {

            tau = real_t(0.0);

        }

        else {

            temp = finit + tau * z[0] / (d[0] * (d[0] - tau)) +

                   tau * z[1] / (d[1] * (d[1] - tau)) +

                   tau * z[2] / (d[2] * (d[2] - tau));


            if (temp <= 0.0) {

                lbd = tau;

            }

            else {

                ubd = tau;

            }

            if (abs(finit) <= abs(temp)) {

                tau = real_t(0.0);

            }

        }

    }


    // get machine parameters for possible scaling to avoid overflow


    // modified by Sven: parameters SMALL1, SMINV1, SMALL2,

    // SMINV2, EPS are not SAVEd anymore between one call to the

    // others but recomputed at each call


    const int base = 2;

    const real_t safmin = safe_min<real_t>();

    real_t small1 = pow(base, log(safmin) / log(real_t(base)) / real_t(3.0));

    real_t sminv1 = real_t(1.0) / small1;

    real_t small2 = small1 * small1;

    real_t sminv2 = sminv1 * sminv1;

    real_t sclfac, sclinv;

    std::vector<real_t> dscale(3);

    std::vector<real_t> zscale(3);


    // Determine if scaling of inputs necessary to avoid overflow when computing

    // 1/TEMP**3


    if (orgati) {

        temp = min(abs(d[1] - tau), abs(d[2] - tau));

    }

    else {

        temp = min(abs(d[0] - tau), abs(d[1] - tau));

    }


    bool scale = false;

    if (temp <= small1) {

        scale = true;

        if (temp <= small2) {

            // Scale up by power of radix nearest 1/SAFMIN**(2/3)

            sclfac = sminv2;

            sclinv = small2;

        }

        else {

            // Scale up by power of radix nearest 1/SAFMIN**(1/3)

            sclfac = sminv1;

            sclinv = small1;

        }


        for (idx_t i = 0; i < 3; i++) {

            dscale[i] = d[i] * sclfac;

            zscale[i] = z[i] * sclfac;

        }


        tau = tau * sclfac;

        lbd = lbd * sclfac;

        ubd = ubd * sclfac;

    }

    else {

        // Copy D and Z to DSCALE and ZSCALE

        for (idx_t i = 0; i < 3; i++) {

            dscale[i] = d[i];

            zscale[i] = z[i];

        }

    }


    real_t fc = real_t(0.0);

    real_t df = real_t(0.0);

    real_t ddf = real_t(0.0);


    for (idx_t i = 0; i < 3; i++) {

        temp = real_t(1.0) / (dscale[i] - tau);

        temp1 = zscale[i] * temp;

        temp2 = temp1 * temp;

        temp3 = temp2 * temp;

        fc = fc + temp1 / dscale[i];

        df = df + temp2;

        ddf = ddf + temp3;

    }


    real_t f = finit + tau * fc;

    bool converge = false;


    if (abs(f) <= 0.0) {

        converge = true;

    }

    if (!converge) {

        if (f <= 0.0) {

            lbd = tau;

        }

        else {

            ubd = tau;

        }

    }


    // Iteration begins -- Use Gragg-Thornton-Warner cubic convergent scheme


    // It is not hard to see that


    // 1) Iterations will go up monotonically

    // if FINIT < 0;


    // 2) Iterations will go down monotonically

    // if FINIT > 0.


    idx_t iter = niter + 1;


    while (iter < maxit) {

        if (converge) {

            break;

        }


        if (orgati) {

            temp1 = dscale[1] - tau;

            temp2 = dscale[2] - tau;

        }

        else {

            temp1 = dscale[0] - tau;

            temp2 = dscale[1] - tau;

        }


        a = (temp1 + temp2) * f - temp1 * temp2 * df;

        b = temp1 * temp2 * f;

        c = f - (temp1 + temp2) * df + temp1 * temp2 * ddf;

        temp = max(max(abs(a), abs(b)), abs(c));

        a = a / temp;

        b = b / temp;

        c = b / temp;


        if (c == 0.0) {

            eta = b / a;

        }

        else if (a <= 0.0) {

            eta = (a - sqrt(abs(a * a - real_t(4.0) * b * c))) /

                  (real_t(2.0) * c);

        }

        else {

            eta =

                real_t(2.0) * b / (a + sqrt(abs(a * a - real_t(4.0) * b * c)));

        }


        if (f * eta >= 0.0) {

            eta = -f / df;

        }


        tau = tau + eta;

        if (tau < lbd || tau > ubd) {

            tau = (lbd + ubd) / real_t(2.0);

        }


        fc = real_t(0.0);

        real_t err = real_t(0.0);

        df = real_t(0.0);

        ddf = real_t(0.0);


        for (idx_t i = 0; i < 3; i++) {

            if ((dscale[i] - tau) != 0) {

                temp = real_t(1.0) / (dscale[i] - tau);

                temp1 = zscale[i] * temp;

                temp2 = temp1 * temp;

                temp3 = temp2 * temp;

                temp4 = temp1 / dscale[i];

                fc = fc + temp4;

                err = err + abs(temp4);

                df = df + temp2;

                ddf = ddf + temp3;

            }

            else {

                converge = true;

                break;

            }

        }


        if (!converge) {

            f = finit + tau * fc;

            err = real_t(8.0) * (abs(finit) + abs(tau) * err) + abs(tau) * df;


            if ((abs(f) <= real_t(4.0) * eps * err) ||

                ((ubd - lbd) <= real_t(4.0) * eps * abs(tau))) {

                converge = true;

                break;

            }


            if (f <= 0.0) {

                lbd = tau;

            }

            else {

                ubd = tau;

            }

        }

        else {

            break;

        }


        iter++;

    }


    if (!converge) {

        info = 1;

    }


    // Undo scaling

    if (scale) {

        tau = tau * sclinv;

    }


    return info;

}


}  // namespace tlapack


#endif  // TLAPACK_LAED6_HH

utils.hpp

tlapack
Sort the numbers in D in increasing order (if ID = 'I') or in decreasing order (if ID = 'D' ).
Definition arrayTraits.hpp:15

tlapack::real_type
typename traits::real_type_traits< Types..., int >::type real_type
The common real type of the list of types.
Definition scalar_type_traits.hpp:113

tlapack::laed6
int laed6(idx_t kniter, bool &orgati, real_t rho, d_t &d, z_t &z, real_t &finit, real_t &tau)
LAED6 used by STEDC.
Definition laed6.hpp:78