/*** *pow.c - raise to a power * * Copyright (c) 1991-1991, Microsoft Corporation. All rights reserved. * *Purpose: * *Revision History: * 8-15-91 GDP written * 12-20-91 GDP support IEEE exceptions & denormals * 1-11-92 GDP special handling of small powers * special handling of u1, u2 when cancellation occurs * 3-22-92 GDP changed handling of int exponents, pow(0, neg) * added check to avoid internal overflow due to large y * 11-09-92 GDP adjusted special return values according to NCEG spec * pow(0,0) now returns 1 * 07-16-93 SRW ALPHA Merge * *******************************************************************************/ #include #include #include static double _reduce(double); static double const a1[18] = { 0.00000000000000000000e+000, /* dummy element */ 1.00000000000000000000e+000, 9.57603280698573646910e-001, 9.17004043204671231754e-001, 8.78126080186649741555e-001, 8.40896415253714543073e-001, 8.05245165974627154042e-001, 7.71105412703970411793e-001, 7.38413072969749655712e-001, 7.07106781186547524436e-001, 6.77127773468446364133e-001, 6.48419777325504832961e-001, 6.20928906036742024317e-001, 5.94603557501360533344e-001, 5.69394317378345826849e-001, 5.45253866332628829604e-001, 5.22136891213706920173e-001, 5.00000000000000000000e-001 }; static double const a2[9] = { 0.00000000000000000000e+000, /* dummy element */ -5.31259064517897172664e-017, 1.47993596544271355242e-017, 1.23056946577104753260e-017, -1.74014448683923461658e-017, 3.84891771232354074073e-017, 2.33103467084383453312e-017, 4.45607092891542322377e-017, 4.27717757045531499216e-017 }; static double const log2inv = 1.44269504088896340739e+0; // 1/log(2) static double const K = 0.44269504088896340736e+0; static double const p1 = 0.83333333333333211405e-1; static double const p2 = 0.12500000000503799174e-1; static double const p3 = 0.22321421285924258967e-2; static double const p4 = 0.43445775672163119635e-3; #define P(v) (((p4 * v + p3) * v + p2) * v + p1) static double const q1 = 0.69314718055994529629e+0; static double const q2 = 0.24022650695909537056e+0; static double const q3 = 0.55504108664085595326e-1; static double const q4 = 0.96181290595172416964e-2; static double const q5 = 0.13333541313585784703e-2; static double const q6 = 0.15400290440989764601e-3; static double const q7 = 0.14928852680595608186e-4; #define Q(w) ((((((q7 * w + q6) * w + q5) * w + q4) * w + \ q3) * w + q2) * w + q1) /* * Thresholds for over/underflow that results in an adjusted value * too big/small to be represented as a double. An infinity or 0 * is delivered to the trap handler instead */ static _dbl const _ovfx ={SET_DBL(0x40e40000,0)}; // 16*log2(XMAX*2^IEEE_ADJ) static _dbl const _uflx ={SET_DBL(0xc0e3fc00,0)}; // 16*log2(XMIN*2^(-IEEE_ADJ)) #define OVFX _ovfx.dbl #define UFLX _uflx.dbl #define INT_POW_LIMIT 128.0 static double ymax = 1e20; static double _reduce(double x) { return 0.0625 * _frnd( 16.0 * x); } /*** *double pow(double x, double y) - x raised to the power of y * *Purpose: * Calculate x^y * Algorithm from Cody & Waite * *Entry: * *Exit: * *Exceptions: * * All 5 IEEE exceptions may occur * *******************************************************************************/ double pow(double x, double y) { unsigned int savedcw; int m,mprim; int p,pprim; int i,iw1; int iy; int newexp; double diw1; double sign; double g,z,bigz,v,rz,result; double u1,u2,y1,y2,w,w1,w2; double savedx; /* save user fp control word */ savedcw = _maskfp(); savedx = x; // save original value of first argument /* check for infinity or NAN */ if (IS_D_SPECIAL(x) || IS_D_SPECIAL(y)){ if (IS_D_SNAN(x) || IS_D_SNAN(y)){ return _except2(FP_I,OP_POW,savedx,y,_d_snan2(x,y),savedcw); } if (IS_D_QNAN(x) || IS_D_QNAN(y)){ return _handle_qnan2(OP_POW,x,y,savedcw); } /* there is at least one infinite argument ... */ if (_powhlp(x, y, &result) < 0) { return _except2(FP_I,OP_POW,savedx,y,result,savedcw); } RETURN(savedcw,result); } if (y == 0.0) { RETURN(savedcw, 1.0); } if (x == 0.0) { int type; type = _d_inttype(y); if (y < 0.0) { result = (type == _D_ODD ? _copysign(D_INF,x) : D_INF); return _except2(FP_Z,OP_POW,savedx,y,result,savedcw); } else { result = (type == _D_ODD ? x : 0.0); RETURN(savedcw,result); } } sign = 1.0; if (x < 0) { switch (_d_inttype(y)) { case _D_ODD: /* y is an odd integral value */ sign = -1.0; /* NO BREAK */ case _D_EVEN: x = -x; break; default: /* y is not an integral value */ return _except2(FP_I,OP_POW,savedx,y,D_IND,savedcw); } } // // This is here in order to prevent internal overflows // due to a large value of y // The following relation holds on overflow with a scaled // result out of range // (lg stands for log base 2) // |y| * |lg(x)| > MAXEXP + IEEE_ADJUST <=> // |y| > 2560 / |lg(x)| // The values of lg(x) closer to 0 are: // x lg(x) // 3fefffffffffffff (0,99...9) -1.601e-16 // 3ff0000000000000 (1.0) 0.0 // 3ff0000000000001 (1.00...1) 3.203e-16 // // So if |y| > 2560/1.6e-16 = 1.6e19 overflow occurs // We set ymax to 1e20 in order to have a safety margin // if (ABS(y) > ymax) { if (y < 0) { y = -y; // // this may cause an underflow // there is no problem with fp sw pollution because // a FP_U exception is going to be raised anyway. // x = 1.0 / x; } if (x > 1.0) { return _except2(FP_O | FP_P,OP_POW,savedx,y,_copysign(D_INF,sign),savedcw); } else if (x < 1.0){ return _except2(FP_U | FP_P,OP_POW,savedx,y,sign*0.0,savedcw); } else { RETURN(savedcw, sign*1.0); } } /* determine m, g */ g = _decomp(x, &m); /* handle small integer powers * for small integer powers this is faster that Cody&Waite's * algorithm, and yields better precision * Without this piece of code there was not enough precision * to satisfy all requirements of the 'paranoia' test. * We choose INT_POW_LIMIT such that (1) no overflow or underflow * occurs while computing bigz (g is in the range * [0.5, 1.0) or (1.0, 2.0] so INT_POW_LIMIT should be less than * approximately 10^3) and (2) no extraordinary loss of precision * occurs because of repeated multiplications (this practically * restricts the maximum INT_POW_LIMIT to 128). */ if (y <= INT_POW_LIMIT && _d_inttype(x) != _D_NOINT && _d_inttype(y) != _D_NOINT && y > 0.0 ) { iy = (int)y; mprim = m * iy; for (bigz=1 ; iy ; iy >>= 1, g *= g) { if (iy & 0x1) bigz *= g; } newexp = _get_exp(bigz) + mprim; if (newexp > MAXEXP + IEEE_ADJUST) { #ifdef _M_PPC return _except2(FP_O | FP_P, OP_POW, savedx, y, _copysign(D_INF, sign), savedcw); #else return _except2(FP_O | FP_P, OP_POW, savedx, y, _copysign(D_INF, bigz), savedcw); #endif } if (newexp < MINEXP - IEEE_ADJUST) { #ifdef _M_PPC return _except2(FP_U | FP_P, OP_POW, savedx, y, sign*0.0, savedcw); #else return _except2(FP_U | FP_P, OP_POW, savedx, y, bigz*0.0, savedcw); #endif } } else { /* determine p using binary search */ p = 1; if (g <= a1[9]) p = 9; if (g <= a1[p+4]) p += 4; if (g <= a1[p+2]) p += 2; /* C&W's algorithm is not very accurate when m*16-p == 1, * because there is cancellation between u1 and u2. * Handle this separately. */ if (ABS(m*16-p) == 1) { u1 = log(x) * log2inv; u2 = 0.0; } else { /* determine z */ z = ( (g - a1[p+1]) - a2[(p+1)/2] ) / ( g + a1[p+1] ); z += z; /* determine u2 */ v = z * z; rz = P(v) * v * z; rz += K * rz; u2 = (rz + z * K) + z; u1 = (m * 16 - p) * 0.0625; } /* determine w1, w2 */ y1 = _reduce(y); y2 = y - y1; w = u2 * y + u1 * y2; w1 = _reduce(w); w2 = w - w1; w = w1 + u1 * y1; w1 = _reduce(w); w2 += w - w1; w = _reduce(w2); diw1 = 16 * (w1 + w); /* iw1 might overflow here, so use diw1 */ w2 -= w; if (diw1 > OVFX) { return _except2(FP_O | FP_P,OP_POW,savedx,y,_copysign(D_INF,sign),savedcw); } if (diw1 < UFLX) { return _except2(FP_U | FP_P,OP_POW,savedx,y,sign*0.0,savedcw); } iw1 = (int) diw1; /* now it is safe to cast to int */ /* make sure w2 <= 0 */ if (w2 > 0) { iw1 += 1; w2 -= 0.0625; } /* determine mprim, pprim */ i = iw1 < 0 ? 0 : 1; mprim = iw1 / 16 + i; pprim = 16 * mprim - iw1; /* determine 2^w2 */ bigz = Q(w2) * w2; /* determine final result */ bigz = a1[pprim + 1] + a1[pprim + 1] * bigz; newexp = _get_exp(bigz) + mprim; } if (newexp > MAXEXP) { result = sign * _set_exp(bigz, newexp - IEEE_ADJUST); return _except2(FP_O | FP_P, OP_POW, savedx, y, result, savedcw); } if (newexp < MINEXP) { result = sign * _set_exp(bigz, newexp + IEEE_ADJUST); return _except2(FP_U | FP_P, OP_POW, savedx, y, result, savedcw); } result = sign * _set_exp(bigz, newexp); RETURN_INEXACT2(OP_POW, savedx, y, result, savedcw); }