/***
*pow.c - raise to a power
*
*	Copyright (c) 1991-1991, Microsoft Corporation.	All rights reserved.
*
*Purpose:
*
*Revision History:
*   8-15-91	GDP	written
*  12-20-91	GDP	support IEEE exceptions & denormals
*   1-11-92	GDP	special handling of small powers
*			special handling of u1, u2 when cancellation occurs
*   3-22-92	GDP	changed handling of int exponents, pow(0, neg)
*			added check to avoid internal overflow due to large y
*  11-09-92	GDP	adjusted special return values according to NCEG spec
*			pow(0,0) now returns 1
*  07-16-93     SRW     ALPHA Merge
*
*******************************************************************************/
#include <math.h>
#include <trans.h>
#include <float.h>

static double _reduce(double);

static double const a1[18] = {
    0.00000000000000000000e+000,   /* dummy element */
    1.00000000000000000000e+000,
    9.57603280698573646910e-001,
    9.17004043204671231754e-001,
    8.78126080186649741555e-001,
    8.40896415253714543073e-001,
    8.05245165974627154042e-001,
    7.71105412703970411793e-001,
    7.38413072969749655712e-001,
    7.07106781186547524436e-001,
    6.77127773468446364133e-001,
    6.48419777325504832961e-001,
    6.20928906036742024317e-001,
    5.94603557501360533344e-001,
    5.69394317378345826849e-001,
    5.45253866332628829604e-001,
    5.22136891213706920173e-001,
    5.00000000000000000000e-001
};

static double const a2[9] = {
    0.00000000000000000000e+000,   /* dummy element */
   -5.31259064517897172664e-017,
    1.47993596544271355242e-017,
    1.23056946577104753260e-017,
   -1.74014448683923461658e-017,
    3.84891771232354074073e-017,
    2.33103467084383453312e-017,
    4.45607092891542322377e-017,
    4.27717757045531499216e-017
};

static double const log2inv = 1.44269504088896340739e+0; //  1/log(2)
static double const K = 0.44269504088896340736e+0;

static double const p1 = 0.83333333333333211405e-1;
static double const p2 = 0.12500000000503799174e-1;
static double const p3 = 0.22321421285924258967e-2;
static double const p4 = 0.43445775672163119635e-3;

#define P(v) (((p4 * v + p3) * v + p2) * v + p1)

static double const q1 = 0.69314718055994529629e+0;
static double const q2 = 0.24022650695909537056e+0;
static double const q3 = 0.55504108664085595326e-1;
static double const q4 = 0.96181290595172416964e-2;
static double const q5 = 0.13333541313585784703e-2;
static double const q6 = 0.15400290440989764601e-3;
static double const q7 = 0.14928852680595608186e-4;

#define Q(w)   ((((((q7 * w + q6) * w + q5) * w + q4) * w + \
			      q3) * w + q2) * w + q1)


/*
 * Thresholds for over/underflow that results in an adjusted value
 * too big/small to be represented as a double. An infinity or 0
 * is delivered to the trap handler instead
 */

static _dbl const _ovfx ={SET_DBL(0x40e40000,0)}; // 16*log2(XMAX*2^IEEE_ADJ)
static _dbl const _uflx ={SET_DBL(0xc0e3fc00,0)}; // 16*log2(XMIN*2^(-IEEE_ADJ))

#define OVFX _ovfx.dbl
#define UFLX _uflx.dbl

#define INT_POW_LIMIT	128.0

static double ymax = 1e20;

static double _reduce(double x)
{
    return 0.0625 * _frnd( 16.0 * x);
}

/***
*double pow(double x, double y) - x raised to the power of y
*
*Purpose:
*   Calculate x^y
*   Algorithm from Cody & Waite
*
*Entry:
*
*Exit:
*
*Exceptions:
*
*   All 5 IEEE exceptions may occur
*
*******************************************************************************/
double pow(double x, double y)
{
    unsigned int savedcw;
    int m,mprim;
    int p,pprim;
    int i,iw1;
    int iy;
    int newexp;
    double diw1;
    double sign;
    double g,z,bigz,v,rz,result;
    double u1,u2,y1,y2,w,w1,w2;
    double savedx;

    /* save user fp control word */
    savedcw = _maskfp();
    savedx = x; 	// save original value of first argument

    /* check for infinity or NAN */
    if (IS_D_SPECIAL(x) || IS_D_SPECIAL(y)){
	if (IS_D_SNAN(x) || IS_D_SNAN(y)){
	    return _except2(FP_I,OP_POW,savedx,y,_d_snan2(x,y),savedcw);
	}
	if (IS_D_QNAN(x) || IS_D_QNAN(y)){
	    return _handle_qnan2(OP_POW,x,y,savedcw);
	}
	/* there is at least one infinite argument ... */
	if (_powhlp(x, y, &result) < 0) {
	    return _except2(FP_I,OP_POW,savedx,y,result,savedcw);
	}
	RETURN(savedcw,result);
    }

    if (y == 0.0) {
	RETURN(savedcw, 1.0);
    }

    if (x == 0.0) {
	int type;

	type = _d_inttype(y);

	if (y < 0.0) {
	    result = (type == _D_ODD ? _copysign(D_INF,x) : D_INF);
	    return _except2(FP_Z,OP_POW,savedx,y,result,savedcw);
	}
	else {
	    result = (type == _D_ODD ? x : 0.0);
	    RETURN(savedcw,result);
	}
    }

    sign = 1.0;
    if (x < 0) {
	switch (_d_inttype(y)) {
	case _D_ODD: /* y is an odd integral value */
	    sign = -1.0;
	    /* NO BREAK */
	case _D_EVEN:
	    x = -x;
	    break;
	default: /* y is not an integral value */
	    return _except2(FP_I,OP_POW,savedx,y,D_IND,savedcw);
	}
    }

    //
    // This is here in order to prevent internal overflows
    // due to a large value of y
    // The following relation holds on overflow with a scaled
    // result out of range
    // (lg stands for log base 2)
    //	    |y| * |lg(x)| > MAXEXP + IEEE_ADJUST	<=>
    //	    |y| >  2560 / |lg(x)|
    // The values of lg(x) closer to 0 are:
    //	    x					 lg(x)
    //	3fefffffffffffff    (0,99...9)	      -1.601e-16
    //	3ff0000000000000    (1.0)	       0.0
    //	3ff0000000000001    (1.00...1)	       3.203e-16
    //
    // So if |y| > 2560/1.6e-16 = 1.6e19 overflow occurs
    // We set ymax to 1e20 in order to have a safety margin
    //

    if (ABS(y) > ymax) {
	if (y < 0) {
	    y = -y;
	    //
	    // this may cause an underflow
	    // there is no problem with fp sw pollution because
	    // a FP_U exception is going to be raised anyway.
	    //
	    x = 1.0 / x;
	}
	if (x > 1.0) {
	    return _except2(FP_O | FP_P,OP_POW,savedx,y,_copysign(D_INF,sign),savedcw);
	}
	else if (x < 1.0){
	    return _except2(FP_U | FP_P,OP_POW,savedx,y,sign*0.0,savedcw);
	}
	else {
	    RETURN(savedcw, sign*1.0);
	}
    }


    /* determine m, g */
    g = _decomp(x, &m);


    /* handle small integer powers
     * for small integer powers this is faster that Cody&Waite's
     * algorithm, and yields better precision
     * Without this piece of code there was not enough precision
     * to satisfy all requirements of the 'paranoia' test.
     * We choose INT_POW_LIMIT such that (1) no overflow or underflow
     * occurs while computing bigz (g is in the range
     * [0.5, 1.0) or (1.0, 2.0] so INT_POW_LIMIT should be less than
     * approximately 10^3) and (2) no extraordinary loss of precision
     * occurs because of repeated multiplications (this practically
     * restricts the maximum INT_POW_LIMIT to 128).
     */

    if (y <= INT_POW_LIMIT &&
	_d_inttype(x) != _D_NOINT &&
	_d_inttype(y) != _D_NOINT &&
	y > 0.0 ) {

	iy = (int)y;
	mprim = m * iy;

	for (bigz=1 ; iy ; iy >>= 1, g *= g) {
	    if (iy & 0x1)
		bigz *= g;
	}

	newexp = _get_exp(bigz) + mprim;
	if (newexp > MAXEXP + IEEE_ADJUST) {
#ifdef _M_PPC
	    return _except2(FP_O | FP_P, OP_POW, savedx, y, _copysign(D_INF, sign), savedcw);
#else
	    return _except2(FP_O | FP_P, OP_POW, savedx, y, _copysign(D_INF, bigz), savedcw);
#endif
	}
	if (newexp < MINEXP - IEEE_ADJUST) {
#ifdef _M_PPC
	    return _except2(FP_U | FP_P, OP_POW, savedx, y, sign*0.0, savedcw);
#else
	    return _except2(FP_U | FP_P, OP_POW, savedx, y, bigz*0.0, savedcw);
#endif
	}

    }


    else {

	/* determine p using binary search */
	p = 1;
	if (g <= a1[9])
	    p = 9;
	if (g <= a1[p+4])
	    p += 4;
	if (g <= a1[p+2])
	    p += 2;


	/* C&W's algorithm is not very accurate when m*16-p == 1,
	 * because there is cancellation between u1 and u2.
	 * Handle this separately.
	 */
	if (ABS(m*16-p) == 1) {
	    u1 = log(x) * log2inv;
	    u2 = 0.0;
	}
	else {
	    /* determine z */
	    z = ( (g - a1[p+1]) - a2[(p+1)/2] ) / ( g + a1[p+1] );
	    z += z;


	    /* determine u2 */
	    v = z * z;
	    rz = P(v) * v * z;
	    rz += K * rz;
	    u2 = (rz + z * K) + z;

	    u1 = (m * 16 - p) * 0.0625;
	}

	/* determine w1, w2 */
	y1 = _reduce(y);
	y2 = y - y1;
	w = u2 * y + u1 * y2;
	w1 = _reduce(w);
	w2 = w - w1;
	w = w1 + u1 * y1;
	w1 = _reduce(w);
	w2 += w - w1;
	w = _reduce(w2);
	diw1 = 16 * (w1 + w); /* iw1 might overflow here, so use diw1 */
	w2 -= w;

	if (diw1 > OVFX) {
	    return _except2(FP_O | FP_P,OP_POW,savedx,y,_copysign(D_INF,sign),savedcw);
	}
	if (diw1 < UFLX) {
	    return _except2(FP_U | FP_P,OP_POW,savedx,y,sign*0.0,savedcw);
	}

	iw1 = (int) diw1;	 /* now it is safe to cast to int */


	/* make sure w2 <= 0 */
	if (w2 > 0) {
	    iw1 += 1;
	    w2 -= 0.0625;
	}

	/* determine mprim, pprim */
	i = iw1 < 0 ? 0 : 1;
	mprim = iw1 / 16 + i;
	pprim = 16 * mprim - iw1;

	/* determine 2^w2 */
	bigz = Q(w2) * w2;

	/* determine  final result */
	bigz = a1[pprim + 1] + a1[pprim + 1] * bigz;
	newexp = _get_exp(bigz) + mprim;
    }


    if (newexp > MAXEXP) {
	result = sign * _set_exp(bigz, newexp - IEEE_ADJUST);
	return _except2(FP_O | FP_P, OP_POW, savedx, y, result, savedcw);
    }
    if (newexp < MINEXP) {
	result = sign * _set_exp(bigz, newexp + IEEE_ADJUST);
	return _except2(FP_U | FP_P, OP_POW, savedx, y, result, savedcw);
    }

    result = sign * _set_exp(bigz, newexp);
    RETURN_INEXACT2(OP_POW, savedx, y, result, savedcw);

}