diff --git a/libmath/desk/lib/math.hoon b/libmath/desk/lib/math.hoon index 5676f62..c979042 100644 --- a/libmath/desk/lib/math.hoon +++ b/libmath/desk/lib/math.hoon @@ -464,27 +464,42 @@ :: Source ++ exp |= x=@rs ^- @rs - :: filter out non-finite arguments - ?: =(x 0x0) .1 - :: check infinities - ?: =(x 0x7f80.0000) `@rs`0x7f80.0000 :: exp(+inf) -> inf - ?: =(x 0xff80.0000) .0.0 :: exp(-inf) -> 0 - :: check NaN - ?. (^gte (dis 0x7fc0.0000 x) 0) `@rs`0x7fc0.0000 :: exp(NaN) -> NaN - :: check overflow to infinity - =/ o-threshold `@rs`0x42b0.c0a8 :: 88.72283905206835, value above which exp(x) overflows - ?: (gth x o-threshold) (mul huge huge) - :: check underflow to zero - =/ u-threshold `@rs`0xc2b0.c0a8 :: -88.72283905206835, value below which exp(x) underflows - ?: (lth x u-threshold) (mul tiny tiny) - :: otherwise, use Taylor series - =/ p .1 - =/ po .-1 - =/ i .1 - |- ^- @rs - ?: (lth (abs (sub po p)) rtol) - p - $(i (add i .1), p (add p (div (pow-n x i) (factorial i))), po p) + :: Chebyshev: x = k*ln2 + r (Cody-Waite reduction); exp(x) = 2^k * P(r), + :: P a degree-6 minimax polynomial faithful to <=1 ULP. Internals are + :: forced to round-nearest-even: a correctly-rounded transcendental does + :: not take a rounding-mode axis (and the SoftFloat jet will match this). + :: +scale2 is a correctly-rounded ldexp that stays exact across the normal + :: range and rounds exactly once into the overflow/subnormal tails. + =/ pow2 |=(j=@s `@rs`(lsh [0 23] (abs:si (sum:si j --127)))) + =/ scale2 + |= [p=@rs k=@s] ^- @rs + ?: (syn:si (dif:si k --128)) :: k>127: (p*2^127)*2^(k-127) + (~(mul ^rs %n) (~(mul ^rs %n) p (pow2 --127)) (pow2 (dif:si k --127))) + ?: !(syn:si (sum:si k --126)) :: k<-126: (p*2^(k+24))*2^-24 + (~(mul ^rs %n) (~(mul ^rs %n) p (pow2 (sum:si k --24))) (pow2 -24)) + (~(mul ^rs %n) p (pow2 k)) + ?: !(~(equ ^rs %n) x x) `@rs`0x7fc0.0000 :: exp(NaN) -> NaN + ?: =(x `@rs`0x7f80.0000) `@rs`0x7f80.0000 :: exp(+inf) -> inf + ?: =(x `@rs`0xff80.0000) `@rs`0x0 :: exp(-inf) -> 0 + =/ log2e `@rs`0x3fb8.aa3b + =/ ln2hi `@rs`0x3f31.7200 + =/ ln2lo `@rs`0x35bf.be8e + =/ k=@s (need (~(toi ^rs %n) (~(mul ^rs %n) x log2e))) + ?: (syn:si (dif:si k --129)) `@rs`0x7f80.0000 :: overflow -> inf + ?: !(syn:si (sum:si k --150)) `@rs`0x0 :: underflow -> 0 + =/ ka (~(sun ^rs %n) (abs:si k)) + =/ kf ?:((syn:si k) ka (~(sub ^rs %n) .0 ka)) :: k as @rs + =/ r + %- ~(sub ^rs %n) + :- (~(sub ^rs %n) x (~(mul ^rs %n) kf ln2hi)) + (~(mul ^rs %n) kf ln2lo) + =/ cs=(list @rs) + :~ `@rs`0x3f80.0000 `@rs`0x3f80.0000 `@rs`0x3f00.0000 + `@rs`0x3e2a.aa02 `@rs`0x3d2a.aa56 `@rs`0x3c09.37d3 + `@rs`0x3ab6.ba99 + == + =/ p (roll (flop cs) |=([c=@rs acc=@rs] (~(add ^rs %n) (~(mul ^rs %n) acc r) c))) + (scale2 p k) :: +sin: @rs -> @rs :: :: Returns the sine of a floating-point atom. @@ -498,25 +513,14 @@ :: Source ++ sin |= x=@rs ^- @rs - :: filter out non-finite arguments - :: check infinities - ?: =(x 0x7f80.0000) `@rs`0x7fc0.0000 :: sin(+inf) -> NaN - ?: =(x 0xff80.0000) `@rs`0x7fc0.0000 :: sin(-inf) -> NaN - :: check NaN - ?. (^gte (dis 0x7fc0.0000 x) 0) `@rs`0x7fc0.0000 :: sin(NaN) -> NaN - :: map into domain - =. x (mod x tau) - :: otherwise, use Taylor series - =/ p x - =/ po .-2 - =/ i 1 - =/ term x - |- ^- @rs - ?. (gth (abs term) rtol) - p - =/ i2 (add (sun i) (sun i)) - =. term (mul (neg term) (div (mul x x) (mul i2 (add i2 .1)))) - $(i +(i), p (add p term), po p) + :: Reduce x = q*(pi/2) + (rhi+rlo) with a 3-part pi/2 (f32 needs the bits), + :: then fdlibm sin/cos kernels picked by q&3. Faithful to <=1 ULP for + :: |x| <~ 500. Round-nearest-even internally (the SoftFloat jet matches). + ?: !(~(equ ^rs %n) x x) `@rs`0x7fc0.0000 :: NaN + ?: |(=(x `@rs`0x7f80.0000) =(x `@rs`0xff80.0000)) `@rs`0x7fc0.0000 :: +-inf + ?: |(=(x `@rs`0x0) =(x `@rs`0x8000.0000)) x :: +-0 -> +-0 + %- trig-fin:rs-trig + [%.y `@rs`(dis x 0x7fff.ffff) (rsh [0 31] x)] :: +cos: @rs -> @rs :: :: Returns the cosine of a floating-point atom. @@ -530,25 +534,52 @@ :: Source ++ cos |= x=@rs ^- @rs - :: filter out non-finite arguments - :: check infinities - ?: =(x 0x7f80.0000) `@rs`0x7fc0.0000 :: sin(+inf) -> NaN - ?: =(x 0xff80.0000) `@rs`0x7fc0.0000 :: sin(-inf) -> NaN - :: check NaN - ?. (^gte (dis 0x7fc0.0000 x) 0) `@rs`0x7fc0.0000 :: sin(NaN) -> NaN - :: map into domain - =. x (mod x tau) - :: otherwise, use Taylor series - =/ p .1 - =/ po .-2 - =/ i 1 - =/ term .1 - |- ^- @rs - ?. (gth (abs term) rtol) - p - =/ i2 (add (sun i) (sun i)) - =. term (mul (neg term) (div (mul x x) (mul i2 (sub i2 .1)))) - $(i +(i), p (add p term), po p) + ?: !(~(equ ^rs %n) x x) `@rs`0x7fc0.0000 + ?: |(=(x `@rs`0x7f80.0000) =(x `@rs`0xff80.0000)) `@rs`0x7fc0.0000 + %- trig-fin:rs-trig + [%.n `@rs`(dis x 0x7fff.ffff) 0] + :: +rs-trig: shared sin/cos engine for the @rs door (see +sin / +cos). + ++ rs-trig + |% + ++ sc ^-((list @rs) :~(`@rs`0xbe2a.aaab `@rs`0x3c08.8889 `@rs`0xb950.0d01 `@rs`0x3638.ef1d `@rs`0xb2d7.322b)) + ++ cc ^-((list @rs) :~(`@rs`0x3d2a.aaab `@rs`0xbab6.0b61 `@rs`0x37d0.0d01 `@rs`0xb493.f27e `@rs`0x310f.76c7)) + ++ neg |=(a=@rs ^-(@rs (~(sub ^rs %n) `@rs`0x0 a))) + ++ ksin + |= [xx=@rs yy=@rs] ^- @rs + =/ z (~(mul ^rs %n) xx xx) + =/ r (roll (flop (tail sc)) |=([c=@rs a=@rs] (~(add ^rs %n) (~(mul ^rs %n) a z) c))) + =/ v (~(mul ^rs %n) z xx) + =/ aa (~(sub ^rs %n) (~(mul ^rs %n) `@rs`0x3f00.0000 yy) (~(mul ^rs %n) v r)) + =/ bb (~(sub ^rs %n) (~(mul ^rs %n) z aa) yy) + =/ dd (~(sub ^rs %n) bb (~(mul ^rs %n) v (head sc))) + (~(sub ^rs %n) xx dd) + ++ kcos + |= [xx=@rs yy=@rs] ^- @rs + =/ z (~(mul ^rs %n) xx xx) + =/ rc (roll (flop cc) |=([c=@rs a=@rs] (~(add ^rs %n) (~(mul ^rs %n) a z) c))) + =/ hz (~(mul ^rs %n) `@rs`0x3f00.0000 z) + =/ w2 (~(sub ^rs %n) `@rs`0x3f80.0000 hz) + =/ aa (~(sub ^rs %n) (~(sub ^rs %n) `@rs`0x3f80.0000 w2) hz) + =/ bb (~(sub ^rs %n) (~(mul ^rs %n) (~(mul ^rs %n) z z) rc) (~(mul ^rs %n) xx yy)) + (~(add ^rs %n) w2 (~(add ^rs %n) aa bb)) + :: +trig-fin: [is-sin? |x| sign-bit] -> sin x (is-sin?) or cos x + ++ trig-fin + |= [s=? ax=@rs sb=@] ^- @rs + =/ q (need (~(toi ^rs %n) (~(mul ^rs %n) ax `@rs`0x3f22.f983))) + =/ qf (~(sun ^rs %n) (abs:si q)) + =/ r1 (~(sub ^rs %n) ax (~(mul ^rs %n) qf `@rs`0x3fc9.0000)) + =/ r2 (~(sub ^rs %n) r1 (~(mul ^rs %n) qf `@rs`0x39fd.a000)) + =/ w (~(mul ^rs %n) qf `@rs`0x33a2.2169) + =/ rhi (~(sub ^rs %n) r2 w) + =/ rlo (~(sub ^rs %n) (~(sub ^rs %n) r2 rhi) w) + =/ m (dis (abs:si q) 3) + =/ ks (ksin rhi rlo) + =/ kc (kcos rhi rlo) + ?: s + =/ v ?:(=(m 0) ks ?:(=(m 1) kc ?:(=(m 2) (neg ks) (neg kc)))) + ?:(=(sb 1) (neg v) v) + ?:(=(m 0) kc ?:(=(m 1) (neg ks) ?:(=(m 2) (neg kc) ks))) + -- :: +tan: @rs -> @rs :: :: Returns the tangent of a floating-point atom. @@ -575,12 +606,9 @@ :: .0.7753969 :: ++ asin + :: fdlibm rational kernel; see +rs-ainv. Faithful to <=1 ULP; |x|>1 -> NaN. |= x=@rs ^- @rs - ?. (gte (abs x) .1) - (atan (div x (sqt (abs (sub .1 (mul x x)))))) - ?: =(.1 x) ^~((mul pi .0.5)) - ?: =(.-1 x) ^~((mul pi .-0.5)) - ~|([%asin-out-of-bounds x] !!) + (asn:rs-ainv x) :: +acos: @rs -> @rs :: :: Returns the inverse cosine of a floating-point atom. @@ -594,12 +622,66 @@ :: ++ acos |= x=@rs ^- @rs - ?. (gte (abs x) .1) - ?: =(.0 x) ^~((mul pi .0.5)) - (atan (div (sqt (abs (sub .1 (mul x x)))) x)) - ?: =(.1 x) .0 - ?: =(.-1 x) pi - ~|([%acos-out-of-bounds x] !!) + (acs:rs-ainv x) + :: +rs-ainv: shared asin/acos engine for the @rs door (rational P/Q kernel), + :: see +asin / +acos. + ++ rs-ainv + |% + ++ rr + |= t=@rs ^- @rs + =/ ps=(list @rs) :~(`@rs`0x3e2a.aa75 `@rs`0xbd2f.13ba `@rs`0xbc0d.d36b) + =/ p (~(mul ^rs %n) t (roll (flop ps) |=([c=@rs a=@rs] (~(add ^rs %n) (~(mul ^rs %n) a t) c)))) + =/ q (~(add ^rs %n) `@rs`0x3f80.0000 (~(mul ^rs %n) t `@rs`0xbf34.e5ae)) + (~(div ^rs %n) p q) + ++ asn + |= x=@rs ^- @rs + ?: !(~(equ ^rs %n) x x) `@rs`0x7fc0.0000 + =/ sgn (rsh [0 31] x) + =/ ax `@rs`(dis x 0x7fff.ffff) + ?: (~(gth ^rs %n) ax `@rs`0x3f80.0000) `@rs`0x7fc0.0000 + ?: =(ax `@rs`0x3f80.0000) + (~(add ^rs %n) (~(mul ^rs %n) x `@rs`0x3fc9.0fdb) (~(mul ^rs %n) x `@rs`0xb33b.bd2e)) + ?: (~(lth ^rs %n) ax `@rs`0x3f00.0000) + ?: (~(lth ^rs %n) ax `@rs`0x3980.0000) x + (~(add ^rs %n) x (~(mul ^rs %n) x (rr (~(mul ^rs %n) x x)))) + =/ w (~(sub ^rs %n) `@rs`0x3f80.0000 ax) + =/ t (~(mul ^rs %n) w `@rs`0x3f00.0000) + =/ r (rr t) + =/ s (sqt t) + ?: (~(gte ^rs %n) ax `@rs`0x3f79.999a) + =/ res (~(sub ^rs %n) `@rs`0x3fc9.0fdb (~(sub ^rs %n) (~(mul ^rs %n) `@rs`0x4000.0000 (~(add ^rs %n) s (~(mul ^rs %n) s r))) `@rs`0xb33b.bd2e)) + ?:(=(sgn 1) (~(sub ^rs %n) `@rs`0x0 res) res) + =/ df `@rs`(dis s 0xffff.f000) + =/ c (~(div ^rs %n) (~(sub ^rs %n) t (~(mul ^rs %n) df df)) (~(add ^rs %n) s df)) + =/ p2 (~(sub ^rs %n) (~(mul ^rs %n) `@rs`0x4000.0000 (~(mul ^rs %n) s r)) (~(sub ^rs %n) `@rs`0xb33b.bd2e (~(mul ^rs %n) `@rs`0x4000.0000 c))) + =/ q2 (~(sub ^rs %n) `@rs`0x3f49.0fdb (~(mul ^rs %n) `@rs`0x4000.0000 df)) + =/ res (~(sub ^rs %n) `@rs`0x3f49.0fdb (~(sub ^rs %n) p2 q2)) + ?:(=(sgn 1) (~(sub ^rs %n) `@rs`0x0 res) res) + ++ acs + |= x=@rs ^- @rs + ?: !(~(equ ^rs %n) x x) `@rs`0x7fc0.0000 + =/ neg (rsh [0 31] x) + =/ ax `@rs`(dis x 0x7fff.ffff) + ?: (~(gth ^rs %n) ax `@rs`0x3f80.0000) `@rs`0x7fc0.0000 + ?: =(ax `@rs`0x3f80.0000) + ?: =(neg 0) `@rs`0x0 + (~(add ^rs %n) `@rs`0x4049.0fdb (~(mul ^rs %n) `@rs`0x4000.0000 `@rs`0xb33b.bd2e)) + ?: (~(lth ^rs %n) ax `@rs`0x3f00.0000) + ?: (~(lth ^rs %n) ax `@rs`0x3280.0000) `@rs`0x3fc9.0fdb + =/ z (~(mul ^rs %n) x x) + =/ r (rr z) + (~(sub ^rs %n) `@rs`0x3fc9.0fdb (~(sub ^rs %n) x (~(sub ^rs %n) `@rs`0xb33b.bd2e (~(mul ^rs %n) x r)))) + ?: =(neg 1) + =/ z (~(mul ^rs %n) (~(add ^rs %n) `@rs`0x3f80.0000 x) `@rs`0x3f00.0000) + =/ s (sqt z) + =/ r (rr z) + =/ w (~(sub ^rs %n) (~(mul ^rs %n) r s) `@rs`0xb33b.bd2e) + (~(sub ^rs %n) `@rs`0x4049.0fdb (~(mul ^rs %n) `@rs`0x4000.0000 (~(add ^rs %n) s w))) + =/ z (~(mul ^rs %n) (~(sub ^rs %n) `@rs`0x3f80.0000 x) `@rs`0x3f00.0000) + =/ s (sqt z) + =/ r (rr z) + (~(mul ^rs %n) `@rs`0x4000.0000 (~(add ^rs %n) s (~(mul ^rs %n) s r))) + -- :: +atan: @rs -> @rs :: :: Returns the inverse tangent of a floating-point atom. @@ -612,15 +694,54 @@ :: .1.2626364 :: ++ atan + :: fdlibm breakpoint reduction + minimax poly; odd. Round-nearest-even + :: internally (the SoftFloat jet matches). |= x=@rs ^- @rs - =/ a (pow (add .1 (mul x x)) .-0.5) - =/ b .1 - |- - ?. (gth (abs (sub a b)) rtol) - (div x (mul (pow (add .1 (mul x x)) .0.5) b)) - =/ ai (mul .0.5 (add a b)) - =/ bi (sqt (mul ai b)) - $(a ai, b bi) + ?: !(~(equ ^rs %n) x x) `@rs`0x7fc0.0000 :: NaN + ?: =(x `@rs`0x7f80.0000) `@rs`0x3fc9.0fdb :: +inf -> pi/2 + ?: =(x `@rs`0xff80.0000) `@rs`0xbfc9.0fdb :: -inf -> -pi/2 + ?: |(=(x `@rs`0x0) =(x `@rs`0x8000.0000)) x :: +-0 -> +-0 + =/ neg (rsh [0 31] x) + =/ r (ker:rs-atan `@rs`(dis x 0x7fff.ffff)) + ?:(=(neg 1) (~(sub ^rs %n) `@rs`0x0 r) r) + :: +rs-atan: atan kernel for the @rs door (reduction + poly), see +atan. + ++ rs-atan + |% + ++ at + ^- (list @rs) + :~ `@rs`0x3eaa.aaa9 `@rs`0xbe4c.ca98 `@rs`0x3e11.f50d + `@rs`0xbdda.1247 `@rs`0x3d7c.ac25 + == + ++ atred + |= ax=@rs ^- [xr=@rs hi=@rs lo=@rs dir=?] + =/ one `@rs`0x3f80.0000 + =/ two `@rs`0x4000.0000 + =/ ohf `@rs`0x3fc0.0000 + ?: (~(lth ^rs %n) ax `@rs`0x3ee0.0000) + [ax `@rs`0x0 `@rs`0x0 %.y] + ?: (~(lth ^rs %n) ax `@rs`0x3f30.0000) + :* (~(div ^rs %n) (~(sub ^rs %n) (~(add ^rs %n) ax ax) one) (~(add ^rs %n) two ax)) + `@rs`0x3eed.6338 `@rs`0x31ac.376a %.n + == + ?: (~(lth ^rs %n) ax `@rs`0x3f98.0000) + :* (~(div ^rs %n) (~(sub ^rs %n) ax one) (~(add ^rs %n) ax one)) + `@rs`0x3f49.0fdb `@rs`0xb2bb.bd2e %.n + == + ?: (~(lth ^rs %n) ax `@rs`0x401c.0000) + :* (~(div ^rs %n) (~(sub ^rs %n) ax ohf) (~(add ^rs %n) one (~(mul ^rs %n) ohf ax))) + `@rs`0x3f7b.985f `@rs`0xb2d7.e096 %.n + == + :* (~(div ^rs %n) `@rs`0xbf80.0000 ax) + `@rs`0x3fc9.0fdb `@rs`0xb33b.bd2e %.n + == + ++ ker + |= ax=@rs ^- @rs + =/ q (atred ax) + =/ z (~(mul ^rs %n) xr.q xr.q) + =/ s (~(mul ^rs %n) z (roll (flop at) |=([c=@rs a=@rs] (~(add ^rs %n) (~(mul ^rs %n) a z) c)))) + ?: dir.q (~(sub ^rs %n) xr.q (~(mul ^rs %n) xr.q s)) + (~(sub ^rs %n) hi.q (~(sub ^rs %n) (~(sub ^rs %n) (~(mul ^rs %n) xr.q s) lo.q) xr.q)) + -- :: +atan2: [@rs @rs] -> @rs :: :: Returns the inverse tangent of a floating-point coordinate. @@ -683,25 +804,41 @@ :: .0.9999994 :: Source ++ log - |= z=@rs ^- @rs - :: filter out non-finite arguments - :: check infinities - ?: =(z 0x7f80.0000) `@rs`0x7f80.0000 :: log(+inf) -> inf - ?: =(z 0xff80.0000) `@rs`0x7fc0.0000 :: log(-inf) -> NaN - :: check NaN - ?. (^gte (dis 0x7fc0.0000 z) 0) `@rs`0x7fc0.0000 :: exp(NaN) -> NaN - :: otherwise, use Taylor series - =/ p .0 - =/ po .-1 - =/ i .0 - |- ^- @rs - ?: (lth (abs (sub po p)) rtol) - (mul (div (mul .2 (sub z .1)) (add z .1)) p) - =/ term1 (div .1 (add .1 (mul .2 i))) - =/ term2 (mul (sub z .1) (sub z .1)) - =/ term3 (mul (add z .1) (add z .1)) - =/ term (mul term1 (pow-n (div term2 term3) i)) - $(i (add i .1), p (add p term), po p) + |= x=@rs ^- @rs + :: Reduce x = 2^e * m with m in [sqrt(1/2), sqrt(2)); then + :: log(x) = e*ln2 + log(1+f), f = m-1, s = f/(2+f), + :: log(1+f) = f - s*(f - 2z*P2(z)), z = s*s, P2 the atanh series + :: 1/3 + z/5 + z^2/7 + ... Faithful to <=1 ULP; round-nearest-even + :: internally (the SoftFloat jet will match this bit-for-bit). + ?: !(~(equ ^rs %n) x x) `@rs`0x7fc0.0000 :: log(NaN) -> NaN + ?: =(x `@rs`0x7f80.0000) `@rs`0x7f80.0000 :: log(+inf) -> inf + ?: |(=(x `@rs`0x0) =(x `@rs`0x8000.0000)) `@rs`0xff80.0000 :: log(+-0) -> -inf + ?: =(1 (rsh [0 31] x)) `@rs`0x7fc0.0000 :: log(x<0) -> NaN + =/ sub =(0 (dis 0xff (rsh [0 23] x))) :: subnormal? + =/ xx ?:(sub (~(mul ^rs %n) x `@rs`0x4b80.0000) x) :: *2^24 + =/ ae ?:(sub -24 --0) + =/ b `@`xx + =/ ef (dif:si (new:si %.y (dis 0xff (rsh [0 23] b))) --127) + =/ m `@rs`(con (dis b 0x7f.ffff) 0x3f80.0000) + =/ big (~(gte ^rs %n) m `@rs`0x3fb5.04f3) :: m >= sqrt(2) + =? m big (~(mul ^rs %n) m `@rs`0x3f00.0000) :: m * 0.5 + =? ef big (sum:si ef --1) + =. ef (sum:si ef ae) + =/ f (~(sub ^rs %n) m `@rs`0x3f80.0000) + =/ s (~(div ^rs %n) f (~(add ^rs %n) m `@rs`0x3f80.0000)) + =/ z (~(mul ^rs %n) s s) + =/ cs=(list @rs) + :~ `@rs`0x3eaa.aaab `@rs`0x3e4c.cccd `@rs`0x3e12.4925 + `@rs`0x3de3.8e39 `@rs`0x3dba.2e8c + == + =/ p2 (roll (flop cs) |=([c=@rs acc=@rs] (~(add ^rs %n) (~(mul ^rs %n) acc z) c))) + =/ r (~(mul ^rs %n) (~(add ^rs %n) z z) p2) + =/ l1 (~(sub ^rs %n) f (~(mul ^rs %n) s (~(sub ^rs %n) f r))) + =/ efa (~(sun ^rs %n) (abs:si ef)) + =/ ef-f ?:((syn:si ef) efa (~(sub ^rs %n) .0 efa)) :: e as @rs + =/ hi (~(mul ^rs %n) ef-f `@rs`0x3f31.7200) :: e*ln2hi + =/ lo (~(mul ^rs %n) ef-f `@rs`0x35bf.be8e) :: e*ln2lo + (~(add ^rs %n) hi (~(add ^rs %n) l1 lo)) :: +log-10: @rs -> @rs :: :: Returns the base-10 logarithm of a floating-point atom. @@ -716,8 +853,15 @@ :: .inf :: Source ++ log-10 - |= z=@rs ^- @rs - (div (log z) log10) + :: e*log10(2) + log(m)/ln10, reusing +lr so the integer part is added with + :: no division rounding (more accurate than log(x)/ln10). + |= x=@rs ^- @rs + ?: !(~(equ ^rs %n) x x) `@rs`0x7fc0.0000 + ?: =(x `@rs`0x7f80.0000) `@rs`0x7f80.0000 + ?: |(=(x `@rs`0x0) =(x `@rs`0x8000.0000)) `@rs`0xff80.0000 + ?: =(1 (rsh [0 31] x)) `@rs`0x7fc0.0000 + =/ el (lr x) + (~(add ^rs %n) (~(mul ^rs %n) ef.el `@rs`0x3e9a.209b) (~(mul ^rs %n) lm.el `@rs`0x3ede.5bd9)) :: +log-2: @rs -> @rs :: :: Returns the base-2 logarithm of a floating-point atom. @@ -726,12 +870,42 @@ :: .-3.321928 :: > (log-2 .2) :: .1.5849625 - :: > (~(log-2 rs [%z .1e-8]) .2) - :: .1.5849633 :: Source ++ log-2 - |= z=@rs ^- @rs - (div (log z) log2) + :: e + log(m)/ln2 (integer part exact); see +lr. + |= x=@rs ^- @rs + ?: !(~(equ ^rs %n) x x) `@rs`0x7fc0.0000 + ?: =(x `@rs`0x7f80.0000) `@rs`0x7f80.0000 + ?: |(=(x `@rs`0x0) =(x `@rs`0x8000.0000)) `@rs`0xff80.0000 + ?: =(1 (rsh [0 31] x)) `@rs`0x7fc0.0000 + =/ el (lr x) + (~(add ^rs %n) ef.el (~(mul ^rs %n) lm.el `@rs`0x3fb8.aa3b)) + :: +lr: log reduction for finite positive x -> [e (as @rs), log(mantissa)]. + ++ lr + |= x=@rs ^- [ef=@rs lm=@rs] + =/ sub =(0 (dis 0xff (rsh [0 23] x))) + =/ xx ?:(sub (~(mul ^rs %n) x `@rs`0x4b80.0000) x) + =/ ae ?:(sub -24 --0) + =/ b `@`xx + =/ e (dif:si (new:si %.y (dis 0xff (rsh [0 23] b))) --127) + =/ m `@rs`(con (dis b 0x7f.ffff) 0x3f80.0000) + =/ big (~(gte ^rs %n) m `@rs`0x3fb5.04f3) + =? m big (~(mul ^rs %n) m `@rs`0x3f00.0000) + =? e big (sum:si e --1) + =. e (sum:si e ae) + =/ f (~(sub ^rs %n) m `@rs`0x3f80.0000) + =/ s (~(div ^rs %n) f (~(add ^rs %n) m `@rs`0x3f80.0000)) + =/ z (~(mul ^rs %n) s s) + =/ cs=(list @rs) + :~ `@rs`0x3eaa.aaab `@rs`0x3e4c.cccd `@rs`0x3e12.4925 + `@rs`0x3de3.8e39 `@rs`0x3dba.2e8c + == + =/ p2 (roll (flop cs) |=([c=@rs a=@rs] (~(add ^rs %n) (~(mul ^rs %n) a z) c))) + =/ r (~(mul ^rs %n) (~(add ^rs %n) z z) p2) + =/ l1 (~(sub ^rs %n) f (~(mul ^rs %n) s (~(sub ^rs %n) f r))) + =/ efa (~(sun ^rs %n) (abs:si e)) + =/ ef ?:((syn:si e) efa (~(sub ^rs %n) .0 efa)) + [ef l1] :: +pow: [@rs @rs] -> @rs :: :: Returns the power of a floating-point atom to a floating-point exponent. @@ -775,15 +949,9 @@ :: .316.22775 :: Source ++ sqt + :: Correctly-rounded: delegate to the stdlib (SoftFloat) f32 square root. |= x=@rs ^- @rs - ?> (sgn x) - ?: =(.0 x) .0 - =/ g=@rs (div x .2) - |- - =/ n=@rs (mul .0.5 (add g (div x g))) - ?. (gth (abs (sub g n)) rtol) - n - $(g n) + (sqt:^rs x) :: +cbrt: @rs -> @rs :: :: Returns the cube root of a floating-point atom. @@ -809,9 +977,13 @@ :: .1.2599207 :: Source ++ cbt + :: cbrt(x) = sign(x) * exp(log|x| / 3); defined for all reals (unlike pow). |= x=@rs ^- @rs - ?> (sgn x) - (pow x .0.33333333) + ?: !(~(equ ^rs %n) x x) x :: NaN -> NaN + ?: |(=(x `@rs`0x0) =(x `@rs`0x8000.0000)) x :: +-0 -> +-0 + =/ ax `@rs`(dis x 0x7fff.ffff) + =/ r (exp (~(mul ^rs %n) (log ax) `@rs`0x3eaa.aaab)) + ?:(=(1 (rsh [0 31] x)) (~(sub ^rs %n) `@rs`0x0 r) r) :: +arg: @rs -> @rs :: :: Returns the argument of a floating-point atom (real argument = absolute @@ -1337,27 +1509,45 @@ :: Source ++ exp |= x=@rd ^- @rd - :: filter out non-finite arguments - ?: =(x 0x0) .~1 - :: check infinities - ?: =(x 0x7ff0.0000.0000.0000) `@rd`0x7ff0.0000.0000.0000 :: exp(+inf) -> inf - ?: =(x 0xfff0.0000.0000.0000) .~0.0 :: exp(-inf) -> 0 - :: check NaN - ?. (^gte (dis 0x7ff8.0000.0000.0000 x) 0) `@rd`0x7ff8.0000.0000.0000 :: exp(NaN) -> NaN - :: check overflow to infinity - =/ o-threshold `@rd`0x4086.2e42.fefa.39ef :: 709.782712893384, value above which exp(x) overflows - ?: (gth x o-threshold) (mul huge huge) - :: check underflow to zero - =/ u-threshold `@rd`0xc086.2e42.fefa.39ef :: -709.782712893384, value below which exp(x) underflows - ?: (lth x u-threshold) (mul tiny tiny) - :: otherwise, use Taylor series - =/ p .~1 - =/ po .~-1 - =/ i .~1 - |- ^- @rd - ?: (lth (abs (sub po p)) rtol) - p - $(i (add i .~1), p (add p (div (pow-n x i) (factorial i))), po p) + :: Chebyshev: x = k*ln2 + r (Cody-Waite reduction); exp(x) = 2^k * P(r), + :: P a degree-11 minimax polynomial faithful to <=1 ULP. Internals are + :: forced to round-nearest-even: a correctly-rounded transcendental does + :: not take a rounding-mode axis (and the SoftFloat jet will match this). + :: +scale2 is a correctly-rounded ldexp that stays exact across the normal + :: range and rounds exactly once into the overflow/subnormal tails. + =/ pow2 |=(j=@s `@rd`(lsh [0 52] (abs:si (sum:si j --1.023)))) + =/ scale2 + |= [p=@rd k=@s] ^- @rd + ?: (syn:si (dif:si k --1.024)) :: k>1023: (p*2^1023)*2^(k-1023) + (~(mul ^rd %n) (~(mul ^rd %n) p (pow2 --1.023)) (pow2 (dif:si k --1.023))) + ?: !(syn:si (sum:si k --1.022)) :: k<-1022: (p*2^(k+54))*2^-54 + (~(mul ^rd %n) (~(mul ^rd %n) p (pow2 (sum:si k --54))) (pow2 -54)) + (~(mul ^rd %n) p (pow2 k)) + ?: !(~(equ ^rd %n) x x) `@rd`0x7ff8.0000.0000.0000 :: NaN + ?: =(x `@rd`0x7ff0.0000.0000.0000) `@rd`0x7ff0.0000.0000.0000 :: +inf + ?: =(x `@rd`0xfff0.0000.0000.0000) `@rd`0x0 :: -inf -> 0 + =/ log2e `@rd`0x3ff7.1547.652b.82fe + =/ ln2hi `@rd`0x3fe6.2e42.fee0.0000 + =/ ln2lo `@rd`0x3dea.39ef.3579.3c76 + =/ k=@s (need (~(toi ^rd %n) (~(mul ^rd %n) x log2e))) + ?: (syn:si (dif:si k --1.025)) `@rd`0x7ff0.0000.0000.0000 :: overflow -> inf + ?: !(syn:si (sum:si k --1.075)) `@rd`0x0 :: underflow -> 0 + =/ ka (~(sun ^rd %n) (abs:si k)) + =/ kf ?:((syn:si k) ka (~(sub ^rd %n) .~0 ka)) :: k as @rd + =/ r + %- ~(sub ^rd %n) + :- (~(sub ^rd %n) x (~(mul ^rd %n) kf ln2hi)) + (~(mul ^rd %n) kf ln2lo) + =/ cs=(list @rd) + :~ `@rd`0x3ff0.0000.0000.0000 `@rd`0x3ff0.0000.0000.0000 + `@rd`0x3fe0.0000.0000.0011 `@rd`0x3fc5.5555.5555.555a + `@rd`0x3fa5.5555.5554.f0cf `@rd`0x3f81.1111.1110.f225 + `@rd`0x3f56.c16c.187f.be02 `@rd`0x3f2a.01a0.1b14.378f + `@rd`0x3efa.0199.1ac8.730a `@rd`0x3ec7.1ddf.5749.d126 + `@rd`0x3e92.8b40.57f4.4145 `@rd`0x3e5a.f631.d005.9bec + == + =/ p (roll (flop cs) |=([c=@rd acc=@rd] (~(add ^rd %n) (~(mul ^rd %n) acc r) c))) + (scale2 p k) :: +sin: @rd -> @rd :: :: Returns the sine of a floating-point atom. @@ -1371,25 +1561,14 @@ :: Source ++ sin |= x=@rd ^- @rd - :: filter out non-finite arguments - :: check infinities - ?: =(x 0x7ff0.0000.0000.0000) `@rd`0x7ff8.0000.0000.0000 :: sin(+inf) -> NaN - ?: =(x 0xfff0.0000.0000.0000) `@rd`0x7ff8.0000.0000.0000 :: sin(-inf) -> NaN - :: check NaN - ?. (^gte (dis 0x7ff8.0000.0000.0000 x) 0) `@rd`0x7ff8.0000.0000.0000 :: sin(NaN) -> NaN - :: map into domain - =. x (mod x tau) - :: otherwise, use Taylor series - =/ p x - =/ po .~-2 - =/ i 1 - =/ term x - |- ^- @rd - ?. (gth (abs term) rtol) - p - =/ i2 (add (sun i) (sun i)) - =. term (mul (neg term) (div (mul x x) (mul i2 (add i2 .~1)))) - $(i +(i), p (add p term), po p) + :: Reduce x = q*(pi/2) + (rhi+rlo) (2-part pi/2), then fdlibm sin/cos + :: kernels picked by q&3. Faithful to <=1 ULP for |x| <~ 2^22. + :: Round-nearest-even internally (the SoftFloat jet matches). + ?: !(~(equ ^rd %n) x x) `@rd`0x7ff8.0000.0000.0000 + ?: |(=(x `@rd`0x7ff0.0000.0000.0000) =(x `@rd`0xfff0.0000.0000.0000)) `@rd`0x7ff8.0000.0000.0000 + ?: |(=(x `@rd`0x0) =(x `@rd`0x8000.0000.0000.0000)) x :: +-0 -> +-0 + %- trig-fin:rd-trig + [%.y `@rd`(dis x 0x7fff.ffff.ffff.ffff) (rsh [0 63] x)] :: +cos: @rd -> @rd :: :: Returns the cosine of a floating-point atom. @@ -1403,25 +1582,63 @@ :: Source ++ cos |= x=@rd ^- @rd - :: filter out non-finite arguments - :: check infinities - ?: =(x 0x7ff0.0000.0000.0000) `@rd`0x7ff8.0000.0000.0000 :: cos(+inf) -> NaN - ?: =(x 0xfff0.0000.0000.0000) `@rd`0x7ff8.0000.0000.0000 :: cos(-inf) -> NaN - :: check NaN - ?. (^gte (dis 0x7ff8.0000.0000.0000 x) 0) `@rd`0x7ff8.0000.0000.0000 :: exp(NaN) -> NaN - :: map into domain - =. x (mod x tau) - :: otherwise, use Taylor series - =/ p .~1 - =/ po .~-2 - =/ i 1 - =/ term .~1 - |- ^- @rd - ?. (gth (abs term) rtol) - p - =/ i2 (add (sun i) (sun i)) - =. term (mul (neg term) (div (mul x x) (mul i2 (sub i2 .~1)))) - $(i +(i), p (add p term), po p) + ?: !(~(equ ^rd %n) x x) `@rd`0x7ff8.0000.0000.0000 + ?: |(=(x `@rd`0x7ff0.0000.0000.0000) =(x `@rd`0xfff0.0000.0000.0000)) `@rd`0x7ff8.0000.0000.0000 + %- trig-fin:rd-trig + [%.n `@rd`(dis x 0x7fff.ffff.ffff.ffff) 0] + :: +rd-trig: shared sin/cos engine for the @rd door (see +sin / +cos). + ++ rd-trig + |% + ++ sc + ^- (list @rd) + :~ `@rd`0xbfc5.5555.5555.5555 `@rd`0x3f81.1111.1111.1111 + `@rd`0xbf2a.01a0.1a01.a01a `@rd`0x3ec7.1de3.a556.c734 + `@rd`0xbe5a.e645.67f5.44e4 `@rd`0x3de6.1246.13a8.6d09 + `@rd`0xbd6a.e7f3.e733.b81f `@rd`0x3ce9.52c7.7030.ad4a + == + ++ cc + ^- (list @rd) + :~ `@rd`0x3fa5.5555.5555.5555 `@rd`0xbf56.c16c.16c1.6c17 + `@rd`0x3efa.01a0.1a01.a01a `@rd`0xbe92.7e4f.b778.9f5c + `@rd`0x3e21.eed8.eff8.d898 `@rd`0xbda9.3974.a8c0.7c9d + `@rd`0x3d2a.e7f3.e733.b81f `@rd`0xbca6.8278.63b9.7d97 + == + ++ neg |=(a=@rd ^-(@rd (~(sub ^rd %n) `@rd`0x0 a))) + ++ ksin + |= [xx=@rd yy=@rd] ^- @rd + =/ z (~(mul ^rd %n) xx xx) + =/ r (roll (flop (tail sc)) |=([c=@rd a=@rd] (~(add ^rd %n) (~(mul ^rd %n) a z) c))) + =/ v (~(mul ^rd %n) z xx) + =/ aa (~(sub ^rd %n) (~(mul ^rd %n) `@rd`0x3fe0.0000.0000.0000 yy) (~(mul ^rd %n) v r)) + =/ bb (~(sub ^rd %n) (~(mul ^rd %n) z aa) yy) + =/ dd (~(sub ^rd %n) bb (~(mul ^rd %n) v (head sc))) + (~(sub ^rd %n) xx dd) + ++ kcos + |= [xx=@rd yy=@rd] ^- @rd + =/ z (~(mul ^rd %n) xx xx) + =/ rc (roll (flop cc) |=([c=@rd a=@rd] (~(add ^rd %n) (~(mul ^rd %n) a z) c))) + =/ hz (~(mul ^rd %n) `@rd`0x3fe0.0000.0000.0000 z) + =/ w2 (~(sub ^rd %n) `@rd`0x3ff0.0000.0000.0000 hz) + =/ aa (~(sub ^rd %n) (~(sub ^rd %n) `@rd`0x3ff0.0000.0000.0000 w2) hz) + =/ bb (~(sub ^rd %n) (~(mul ^rd %n) (~(mul ^rd %n) z z) rc) (~(mul ^rd %n) xx yy)) + (~(add ^rd %n) w2 (~(add ^rd %n) aa bb)) + :: +trig-fin: [is-sin? |x| sign-bit] -> sin x (is-sin?) or cos x + ++ trig-fin + |= [s=? ax=@rd sb=@] ^- @rd + =/ q (need (~(toi ^rd %n) (~(mul ^rd %n) ax `@rd`0x3fe4.5f30.6dc9.c883))) + =/ qf (~(sun ^rd %n) (abs:si q)) + =/ t (~(sub ^rd %n) ax (~(mul ^rd %n) qf `@rd`0x3ff9.21fb.5440.0000)) + =/ w (~(mul ^rd %n) qf `@rd`0x3dd0.b461.1a62.6331) + =/ rhi (~(sub ^rd %n) t w) + =/ rlo (~(sub ^rd %n) (~(sub ^rd %n) t rhi) w) + =/ m (dis (abs:si q) 3) + =/ ks (ksin rhi rlo) + =/ kc (kcos rhi rlo) + ?: s + =/ v ?:(=(m 0) ks ?:(=(m 1) kc ?:(=(m 2) (neg ks) (neg kc)))) + ?:(=(sb 1) (neg v) v) + ?:(=(m 0) kc ?:(=(m 1) (neg ks) ?:(=(m 2) (neg kc) ks))) + -- :: +tan: @rd -> @rd :: :: Returns the tangent of a floating-point atom. @@ -1434,8 +1651,77 @@ :: .~-2.6535896228476087e-6 :: Source ++ tan + :: Dedicated fdlibm __kernel_tan (faithful <=1 ULP); the sin/cos ratio is + :: ~2 ULP. q*pi/2 reduction, odd q uses the -cot path. See +rd-tan. |= x=@rd ^- @rd - (div (sin x) (cos x)) + (main:rd-tan x) + :: +rd-tan: dedicated tangent kernel for the @rd door, see +tan. + ++ rd-tan + |% + ++ redq + |= ax=@rd ^- [q=@s rhi=@rd rlo=@rd] + =/ q (need (~(toi ^rd %n) (~(mul ^rd %n) ax `@rd`0x3fe4.5f30.6dc9.c883))) + =/ qf (~(sun ^rd %n) (abs:si q)) + =/ t (~(sub ^rd %n) ax (~(mul ^rd %n) qf `@rd`0x3ff9.21fb.5440.0000)) + =/ w (~(mul ^rd %n) qf `@rd`0x3dd0.b461.1a62.6331) + =/ rhi (~(sub ^rd %n) t w) + =/ rlo (~(sub ^rd %n) (~(sub ^rd %n) t rhi) w) + [q rhi rlo] + ++ ktan + |= [x=@rd y=@rd iy=@s] ^- @rd + =/ hxneg (rsh [0 63] x) + =/ big (~(gte ^rd %n) `@rd`(dis x 0x7fff.ffff.ffff.ffff) `@rd`0x3fe5.9428.0000.0000) + =/ xa ?:(=(hxneg 1) (~(sub ^rd %n) `@rd`0x0 x) x) + =/ ya ?:(=(hxneg 1) (~(sub ^rd %n) `@rd`0x0 y) y) + =/ xr + ?. big x + (~(add ^rd %n) (~(sub ^rd %n) `@rd`0x3fe9.21fb.5444.2d18 xa) (~(sub ^rd %n) `@rd`0x3c81.a626.3314.5c07 ya)) + =/ yr ?:(big `@rd`0x0 y) + =/ z (~(mul ^rd %n) xr xr) + =/ w (~(mul ^rd %n) z z) + =/ rl=(list @rd) + :~ `@rd`0x3fc1.1111.1110.fe7a `@rd`0x3f96.64f4.8406.d637 + `@rd`0x3f6d.6d22.c956.0328 `@rd`0x3f43.44d8.f2f2.6501 + `@rd`0x3f14.7e88.a037.92a6 `@rd`0xbef3.75cb.db60.5373 + == + =/ vl=(list @rd) + :~ `@rd`0x3fab.a1ba.1bb3.41fe `@rd`0x3f82.26e3.e96e.8493 + `@rd`0x3f57.dbc8.fee0.8315 `@rd`0x3f30.26f7.1a8d.1068 + `@rd`0x3f12.b80f.32f0.a7e9 `@rd`0x3efb.2a70.74bf.7ad4 + == + =/ rr (roll (flop rl) |=([c=@rd a=@rd] (~(add ^rd %n) (~(mul ^rd %n) a w) c))) + =/ vv (~(mul ^rd %n) z (roll (flop vl) |=([c=@rd a=@rd] (~(add ^rd %n) (~(mul ^rd %n) a w) c)))) + =/ s (~(mul ^rd %n) z xr) + =/ r (~(add ^rd %n) yr (~(mul ^rd %n) z (~(add ^rd %n) (~(mul ^rd %n) s (~(add ^rd %n) rr vv)) yr))) + =. r (~(add ^rd %n) r (~(mul ^rd %n) `@rd`0x3fd5.5555.5555.5563 s)) + =/ w2 (~(add ^rd %n) xr r) + ?: big + =/ fac ?:(=(hxneg 1) `@rd`0xbff0.0000.0000.0000 `@rd`0x3ff0.0000.0000.0000) + =/ v ?:(=(iy --1) `@rd`0x3ff0.0000.0000.0000 `@rd`0xbff0.0000.0000.0000) + %+ ~(mul ^rd %n) fac + %+ ~(sub ^rd %n) v + %+ ~(mul ^rd %n) `@rd`0x4000.0000.0000.0000 + %+ ~(sub ^rd %n) xr + (~(sub ^rd %n) (~(div ^rd %n) (~(mul ^rd %n) w2 w2) (~(add ^rd %n) w2 v)) r) + ?: =(iy --1) w2 + =/ zz `@rd`(dis w2 0xffff.ffff.0000.0000) + =/ vv2 (~(sub ^rd %n) r (~(sub ^rd %n) zz xr)) + =/ a (~(div ^rd %n) `@rd`0xbff0.0000.0000.0000 w2) + =/ tt `@rd`(dis a 0xffff.ffff.0000.0000) + =/ ss (~(add ^rd %n) `@rd`0x3ff0.0000.0000.0000 (~(mul ^rd %n) tt zz)) + (~(add ^rd %n) tt (~(mul ^rd %n) a (~(add ^rd %n) ss (~(mul ^rd %n) tt vv2)))) + ++ main + |= x=@rd ^- @rd + ?: !(~(equ ^rd %n) x x) `@rd`0x7ff8.0000.0000.0000 + ?: |(=(x `@rd`0x7ff0.0000.0000.0000) =(x `@rd`0xfff0.0000.0000.0000)) `@rd`0x7ff8.0000.0000.0000 + ?: |(=(x `@rd`0x0) =(x `@rd`0x8000.0000.0000.0000)) x + =/ neg (rsh [0 63] x) + =/ ax `@rd`(dis x 0x7fff.ffff.ffff.ffff) + =/ red (redq ax) + =/ iy ?:(=(0 (dis (abs:si q.red) 1)) --1 -1) + =/ t (ktan rhi.red rlo.red iy) + ?:(=(neg 1) (~(sub ^rd %n) `@rd`0x0 t) t) + -- :: +asin: @rd -> @rd :: :: Returns the inverse sine of a floating-point atom. @@ -1448,12 +1734,9 @@ :: .~0.7753974965943197 :: ++ asin + :: fdlibm rational kernel; see +rd-ainv. Faithful to <=1 ULP; |x|>1 -> NaN. |= x=@rd ^- @rd - ?. (gte (abs x) .~1) - (atan (div x (sqt (abs (sub .~1 (mul x x)))))) - ?: =(.~1 x) ^~((mul pi .~0.5)) - ?: =(.~-1 x) ^~((mul pi .~-0.5)) - ~|([%asin-out-of-bounds x] !!) + (asn:rd-ainv x) :: +acos: @rd -> @rd :: :: Returns the inverse cosine of a floating-point atom. @@ -1467,12 +1750,78 @@ :: ++ acos |= x=@rd ^- @rd - ?. (gte (abs x) .~1) - ?: =(.~0 x) ^~((mul pi .~0.5)) - (atan (div (sqt (abs (sub .~1 (mul x x)))) x)) - ?: =(.~1 x) .~0 - ?: =(.~-1 x) pi - ~|([%acos-out-of-bounds x] !!) + (acs:rd-ainv x) + :: +rd-ainv: shared asin/acos engine for the @rd door (rational P/Q kernel), + :: see +asin / +acos. + ++ rd-ainv + |% + ++ rr + |= t=@rd ^- @rd + =/ ps=(list @rd) + :~ `@rd`0x3fc5.5555.5555.5555 `@rd`0xbfd4.d612.03eb.6f7d + `@rd`0x3fc9.c155.0e88.4455 `@rd`0xbfa4.8228.b568.8f3b + `@rd`0x3f49.efe0.7501.b288 `@rd`0x3f02.3de1.0dfd.f709 + == + =/ qs=(list @rd) + :~ `@rd`0xc003.3a27.1c8a.2d4b `@rd`0x4000.2ae5.9c59.8ac8 + `@rd`0xbfe6.066c.1b8d.0159 `@rd`0x3fb3.b8c5.b12e.9282 + == + =/ p (~(mul ^rd %n) t (roll (flop ps) |=([c=@rd a=@rd] (~(add ^rd %n) (~(mul ^rd %n) a t) c)))) + =/ q (~(add ^rd %n) `@rd`0x3ff0.0000.0000.0000 (~(mul ^rd %n) t (roll (flop qs) |=([c=@rd a=@rd] (~(add ^rd %n) (~(mul ^rd %n) a t) c))))) + (~(div ^rd %n) p q) + ++ asn + |= x=@rd ^- @rd + ?: !(~(equ ^rd %n) x x) `@rd`0x7ff8.0000.0000.0000 + =/ sgn (rsh [0 63] x) + =/ ax `@rd`(dis x 0x7fff.ffff.ffff.ffff) + ?: (~(gth ^rd %n) ax `@rd`0x3ff0.0000.0000.0000) `@rd`0x7ff8.0000.0000.0000 + ?: =(ax `@rd`0x3ff0.0000.0000.0000) + (~(add ^rd %n) (~(mul ^rd %n) x `@rd`0x3ff9.21fb.5444.2d18) (~(mul ^rd %n) x `@rd`0x3c91.a626.3314.5c07)) + ?: (~(lth ^rd %n) ax `@rd`0x3fe0.0000.0000.0000) + ?: (~(lth ^rd %n) ax `@rd`0x3e50.0000.0000.0000) x + =/ t (~(mul ^rd %n) x x) + (~(add ^rd %n) x (~(mul ^rd %n) x (rr t))) + =/ w (~(sub ^rd %n) `@rd`0x3ff0.0000.0000.0000 ax) + =/ t (~(mul ^rd %n) w `@rd`0x3fe0.0000.0000.0000) + =/ r (rr t) + =/ s (sqt t) + ?: (~(gte ^rd %n) ax `@rd`0x3fef.3333.0000.0000) + =/ res (~(sub ^rd %n) `@rd`0x3ff9.21fb.5444.2d18 (~(sub ^rd %n) (~(mul ^rd %n) `@rd`0x4000.0000.0000.0000 (~(add ^rd %n) s (~(mul ^rd %n) s r))) `@rd`0x3c91.a626.3314.5c07)) + ?:(=(sgn 1) (~(sub ^rd %n) `@rd`0x0 res) res) + =/ df `@rd`(dis s 0xffff.ffff.0000.0000) + =/ c (~(div ^rd %n) (~(sub ^rd %n) t (~(mul ^rd %n) df df)) (~(add ^rd %n) s df)) + =/ p2 (~(sub ^rd %n) (~(mul ^rd %n) `@rd`0x4000.0000.0000.0000 (~(mul ^rd %n) s r)) (~(sub ^rd %n) `@rd`0x3c91.a626.3314.5c07 (~(mul ^rd %n) `@rd`0x4000.0000.0000.0000 c))) + =/ q2 (~(sub ^rd %n) `@rd`0x3fe9.21fb.5444.2d18 (~(mul ^rd %n) `@rd`0x4000.0000.0000.0000 df)) + =/ res (~(sub ^rd %n) `@rd`0x3fe9.21fb.5444.2d18 (~(sub ^rd %n) p2 q2)) + ?:(=(sgn 1) (~(sub ^rd %n) `@rd`0x0 res) res) + ++ acs + |= x=@rd ^- @rd + ?: !(~(equ ^rd %n) x x) `@rd`0x7ff8.0000.0000.0000 + =/ neg (rsh [0 63] x) + =/ ax `@rd`(dis x 0x7fff.ffff.ffff.ffff) + ?: (~(gth ^rd %n) ax `@rd`0x3ff0.0000.0000.0000) `@rd`0x7ff8.0000.0000.0000 + ?: =(ax `@rd`0x3ff0.0000.0000.0000) + ?: =(neg 0) `@rd`0x0 + (~(add ^rd %n) `@rd`0x4009.21fb.5444.2d18 (~(mul ^rd %n) `@rd`0x4000.0000.0000.0000 `@rd`0x3c91.a626.3314.5c07)) + ?: (~(lth ^rd %n) ax `@rd`0x3fe0.0000.0000.0000) + ?: (~(lth ^rd %n) ax `@rd`0x3c60.0000.0000.0000) `@rd`0x3ff9.21fb.5444.2d18 + =/ z (~(mul ^rd %n) x x) + =/ r (rr z) + (~(sub ^rd %n) `@rd`0x3ff9.21fb.5444.2d18 (~(sub ^rd %n) x (~(sub ^rd %n) `@rd`0x3c91.a626.3314.5c07 (~(mul ^rd %n) x r)))) + ?: =(neg 1) + =/ z (~(mul ^rd %n) (~(add ^rd %n) `@rd`0x3ff0.0000.0000.0000 x) `@rd`0x3fe0.0000.0000.0000) + =/ s (sqt z) + =/ r (rr z) + =/ w (~(sub ^rd %n) (~(mul ^rd %n) r s) `@rd`0x3c91.a626.3314.5c07) + (~(sub ^rd %n) `@rd`0x4009.21fb.5444.2d18 (~(mul ^rd %n) `@rd`0x4000.0000.0000.0000 (~(add ^rd %n) s w))) + =/ z (~(mul ^rd %n) (~(sub ^rd %n) `@rd`0x3ff0.0000.0000.0000 x) `@rd`0x3fe0.0000.0000.0000) + =/ s (sqt z) + =/ df `@rd`(dis s 0xffff.ffff.0000.0000) + =/ c (~(div ^rd %n) (~(sub ^rd %n) z (~(mul ^rd %n) df df)) (~(add ^rd %n) s df)) + =/ r (rr z) + =/ w (~(add ^rd %n) (~(mul ^rd %n) r s) c) + (~(mul ^rd %n) `@rd`0x4000.0000.0000.0000 (~(add ^rd %n) df w)) + -- :: +atan: @rd -> @rd :: :: Returns the inverse tangent of a floating-point atom. @@ -1485,15 +1834,58 @@ :: .~1.2626272558398273 :: ++ atan + :: fdlibm breakpoint reduction + minimax poly; odd. Round-nearest-even + :: internally (the SoftFloat jet matches). |= x=@rd ^- @rd - =/ a (pow (add .~1 (mul x x)) .~-0.5) - =/ b .~1 - |- - ?. (gth (abs (sub a b)) rtol) - (div x (mul (pow (add .~1 (mul x x)) .~0.5) b)) - =/ ai (mul .~0.5 (add a b)) - =/ bi (sqt (mul ai b)) - $(a ai, b bi) + ?: !(~(equ ^rd %n) x x) `@rd`0x7ff8.0000.0000.0000 :: NaN + ?: =(x `@rd`0x7ff0.0000.0000.0000) `@rd`0x3ff9.21fb.5444.2d18 :: +inf -> pi/2 + ?: =(x `@rd`0xfff0.0000.0000.0000) `@rd`0xbff9.21fb.5444.2d18 :: -inf -> -pi/2 + ?: |(=(x `@rd`0x0) =(x `@rd`0x8000.0000.0000.0000)) x :: +-0 -> +-0 + =/ neg (rsh [0 63] x) + =/ r (ker:rd-atan `@rd`(dis x 0x7fff.ffff.ffff.ffff)) + ?:(=(neg 1) (~(sub ^rd %n) `@rd`0x0 r) r) + :: +rd-atan: atan kernel for the @rd door (reduction + poly), see +atan. + ++ rd-atan + |% + ++ at + ^- (list @rd) + :~ `@rd`0x3fd5.5555.5555.550d `@rd`0xbfc9.9999.9998.ebc4 + `@rd`0x3fc2.4924.9200.83ff `@rd`0xbfbc.71c6.fe23.1671 + `@rd`0x3fb7.45cd.c54c.206e `@rd`0xbfb3.b0f2.af74.9a6d + `@rd`0x3fb1.0d66.a0d0.3d51 `@rd`0xbfad.de2d.52de.fd9a + `@rd`0x3fa9.7b4b.2476.0deb `@rd`0xbfa2.b444.2c6a.6c2f + `@rd`0x3f90.ad3a.e322.da11 + == + ++ atred + |= ax=@rd ^- [xr=@rd hi=@rd lo=@rd dir=?] + =/ one `@rd`0x3ff0.0000.0000.0000 + =/ two `@rd`0x4000.0000.0000.0000 + =/ ohf `@rd`0x3ff8.0000.0000.0000 + ?: (~(lth ^rd %n) ax `@rd`0x3fdc.0000.0000.0000) + [ax `@rd`0x0 `@rd`0x0 %.y] + ?: (~(lth ^rd %n) ax `@rd`0x3fe6.0000.0000.0000) + :* (~(div ^rd %n) (~(sub ^rd %n) (~(add ^rd %n) ax ax) one) (~(add ^rd %n) two ax)) + `@rd`0x3fdd.ac67.0561.bb4f `@rd`0x3c7a.2b7f.222f.65e2 %.n + == + ?: (~(lth ^rd %n) ax `@rd`0x3ff3.0000.0000.0000) + :* (~(div ^rd %n) (~(sub ^rd %n) ax one) (~(add ^rd %n) ax one)) + `@rd`0x3fe9.21fb.5444.2d18 `@rd`0x3c81.a626.3314.5c07 %.n + == + ?: (~(lth ^rd %n) ax `@rd`0x4003.8000.0000.0000) + :* (~(div ^rd %n) (~(sub ^rd %n) ax ohf) (~(add ^rd %n) one (~(mul ^rd %n) ohf ax))) + `@rd`0x3fef.730b.d281.f69b `@rd`0x3c70.0788.7af0.cbbd %.n + == + :* (~(div ^rd %n) `@rd`0xbff0.0000.0000.0000 ax) + `@rd`0x3ff9.21fb.5444.2d18 `@rd`0x3c91.a626.3314.5c07 %.n + == + ++ ker + |= ax=@rd ^- @rd + =/ q (atred ax) + =/ z (~(mul ^rd %n) xr.q xr.q) + =/ s (~(mul ^rd %n) z (roll (flop at) |=([c=@rd a=@rd] (~(add ^rd %n) (~(mul ^rd %n) a z) c)))) + ?: dir.q (~(sub ^rd %n) xr.q (~(mul ^rd %n) xr.q s)) + (~(sub ^rd %n) hi.q (~(sub ^rd %n) (~(sub ^rd %n) (~(mul ^rd %n) xr.q s) lo.q) xr.q)) + -- :: +atan2: [@rd @rd] -> @rd :: :: Returns the inverse tangent of a floating-point coordinate. @@ -1552,25 +1944,44 @@ :: .~inf :: Source ++ log - |= z=@rd ^- @rd - :: filter out non-finite arguments - :: check infinities - ?: =(z 0x7ff0.0000.0000.0000) `@rd`0x7ff0.0000.0000.0000 :: log(+inf) -> inf - ?: =(z 0xfff0.0000.0000.0000) `@rd`0x7ff8.0000.0000.0000 :: log(-inf) -> NaN - :: check NaN - ?. (^gte (dis 0x7ff8.0000.0000.0000 z) 0) `@rd`0x7ff8.0000.0000.0000 :: log(NaN) -> NaN - :: otherwise, use Taylor series - =/ p .~0 - =/ po .~-1 - =/ i .~0 - |- ^- @rd - ?: (lth (abs (sub po p)) rtol) - (mul (div (mul .~2 (sub z .~1)) (add z .~1)) p) - =/ term1 (div .~1 (add .~1 (mul .~2 i))) - =/ term2 (mul (sub z .~1) (sub z .~1)) - =/ term3 (mul (add z .~1) (add z .~1)) - =/ term (mul term1 (pow-n (div term2 term3) i)) - $(i (add i .~1), p (add p term), po p) + |= x=@rd ^- @rd + :: Reduce x = 2^e * m with m in [sqrt(1/2), sqrt(2)); then + :: log(x) = e*ln2 + log(1+f), f = m-1, s = f/(2+f), + :: log(1+f) = f - s*(f - 2z*P2(z)), z = s*s, P2 the atanh series + :: 1/3 + z/5 + z^2/7 + ... Faithful to <=1 ULP; round-nearest-even + :: internally (the SoftFloat jet will match this bit-for-bit). + ?: !(~(equ ^rd %n) x x) `@rd`0x7ff8.0000.0000.0000 :: NaN + ?: =(x `@rd`0x7ff0.0000.0000.0000) `@rd`0x7ff0.0000.0000.0000 :: +inf + ?: |(=(x `@rd`0x0) =(x `@rd`0x8000.0000.0000.0000)) `@rd`0xfff0.0000.0000.0000 :: +-0 -> -inf + ?: =(1 (rsh [0 63] x)) `@rd`0x7ff8.0000.0000.0000 :: x<0 -> NaN + =/ sub =(0 (dis 0x7ff (rsh [0 52] x))) :: subnormal? + =/ xx ?:(sub (~(mul ^rd %n) x `@rd`0x4350.0000.0000.0000) x) :: *2^54 + =/ ae ?:(sub -54 --0) + =/ b `@`xx + =/ ef (dif:si (new:si %.y (dis 0x7ff (rsh [0 52] b))) --1.023) + =/ m `@rd`(con (dis b 0xf.ffff.ffff.ffff) 0x3ff0.0000.0000.0000) + =/ big (~(gte ^rd %n) m `@rd`0x3ff6.a09e.667f.3bcd) :: m >= sqrt(2) + =? m big (~(mul ^rd %n) m `@rd`0x3fe0.0000.0000.0000) :: m * 0.5 + =? ef big (sum:si ef --1) + =. ef (sum:si ef ae) + =/ f (~(sub ^rd %n) m `@rd`0x3ff0.0000.0000.0000) + =/ s (~(div ^rd %n) f (~(add ^rd %n) m `@rd`0x3ff0.0000.0000.0000)) + =/ z (~(mul ^rd %n) s s) + =/ cs=(list @rd) + :~ `@rd`0x3fd5.5555.5555.5555 `@rd`0x3fc9.9999.9999.999a + `@rd`0x3fc2.4924.9249.2492 `@rd`0x3fbc.71c7.1c71.c71c + `@rd`0x3fb7.45d1.745d.1746 `@rd`0x3fb3.b13b.13b1.3b14 + `@rd`0x3fb1.1111.1111.1111 `@rd`0x3fae.1e1e.1e1e.1e1e + `@rd`0x3faa.f286.bca1.af28 `@rd`0x3fa8.6186.1861.8618 + == + =/ p2 (roll (flop cs) |=([c=@rd acc=@rd] (~(add ^rd %n) (~(mul ^rd %n) acc z) c))) + =/ r (~(mul ^rd %n) (~(add ^rd %n) z z) p2) + =/ l1 (~(sub ^rd %n) f (~(mul ^rd %n) s (~(sub ^rd %n) f r))) + =/ efa (~(sun ^rd %n) (abs:si ef)) + =/ ef-f ?:((syn:si ef) efa (~(sub ^rd %n) .~0 efa)) :: e as @rd + =/ hi (~(mul ^rd %n) ef-f `@rd`0x3fe6.2e42.fee0.0000) :: e*ln2hi + =/ lo (~(mul ^rd %n) ef-f `@rd`0x3dea.39ef.3579.3c76) :: e*ln2lo + (~(add ^rd %n) hi (~(add ^rd %n) l1 lo)) :: +log-10: @rd -> @rd :: :: Returns the base-10 logarithm of a floating-point atom. @@ -1583,22 +1994,62 @@ :: .~0.30102999562024696 :: Source ++ log-10 - |= z=@rd ^- @rd - (div (log z) log10) + :: e*log10(2) + log(m)/ln10, reusing +lr so the integer part is added with + :: no division rounding (more accurate than log(x)/ln10). + |= x=@rd ^- @rd + ?: !(~(equ ^rd %n) x x) `@rd`0x7ff8.0000.0000.0000 + ?: =(x `@rd`0x7ff0.0000.0000.0000) `@rd`0x7ff0.0000.0000.0000 + ?: |(=(x `@rd`0x0) =(x `@rd`0x8000.0000.0000.0000)) `@rd`0xfff0.0000.0000.0000 + ?: =(1 (rsh [0 63] x)) `@rd`0x7ff8.0000.0000.0000 + =/ el (lr x) + (~(add ^rd %n) (~(mul ^rd %n) ef.el `@rd`0x3fd3.4413.509f.79ff) (~(mul ^rd %n) lm.el `@rd`0x3fdb.cb7b.1526.e50e)) :: +log-2: @rd -> @rd :: :: Returns the base-2 logarithm of a floating-point atom. :: Examples - :: > (log-2 .0.1) - :: .~-3.321928094582713 - :: > (log-2 .2) - :: .~0.9999999999985144 - :: > (~(log-2 rs [%z .1e-8]) .2) - :: .~0.9999999998547181 + :: > (log-2 .~2) + :: .~1 + :: > (log-2 .~0.1) + :: .~-3.321928094887362 :: Source ++ log-2 - |= z=@rd ^- @rd - (div (log z) log2) + :: e + log(m)/ln2 (integer part exact); see +lr. + |= x=@rd ^- @rd + ?: !(~(equ ^rd %n) x x) `@rd`0x7ff8.0000.0000.0000 + ?: =(x `@rd`0x7ff0.0000.0000.0000) `@rd`0x7ff0.0000.0000.0000 + ?: |(=(x `@rd`0x0) =(x `@rd`0x8000.0000.0000.0000)) `@rd`0xfff0.0000.0000.0000 + ?: =(1 (rsh [0 63] x)) `@rd`0x7ff8.0000.0000.0000 + =/ el (lr x) + (~(add ^rd %n) ef.el (~(mul ^rd %n) lm.el `@rd`0x3ff7.1547.652b.82fe)) + :: +lr: log reduction for finite positive x -> [e (as @rd), log(mantissa)]. + ++ lr + |= x=@rd ^- [ef=@rd lm=@rd] + =/ sub =(0 (dis 0x7ff (rsh [0 52] x))) + =/ xx ?:(sub (~(mul ^rd %n) x `@rd`0x4350.0000.0000.0000) x) + =/ ae ?:(sub -54 --0) + =/ b `@`xx + =/ e (dif:si (new:si %.y (dis 0x7ff (rsh [0 52] b))) --1.023) + =/ m `@rd`(con (dis b 0xf.ffff.ffff.ffff) 0x3ff0.0000.0000.0000) + =/ big (~(gte ^rd %n) m `@rd`0x3ff6.a09e.667f.3bcd) + =? m big (~(mul ^rd %n) m `@rd`0x3fe0.0000.0000.0000) + =? e big (sum:si e --1) + =. e (sum:si e ae) + =/ f (~(sub ^rd %n) m `@rd`0x3ff0.0000.0000.0000) + =/ s (~(div ^rd %n) f (~(add ^rd %n) m `@rd`0x3ff0.0000.0000.0000)) + =/ z (~(mul ^rd %n) s s) + =/ cs=(list @rd) + :~ `@rd`0x3fd5.5555.5555.5555 `@rd`0x3fc9.9999.9999.999a + `@rd`0x3fc2.4924.9249.2492 `@rd`0x3fbc.71c7.1c71.c71c + `@rd`0x3fb7.45d1.745d.1746 `@rd`0x3fb3.b13b.13b1.3b14 + `@rd`0x3fb1.1111.1111.1111 `@rd`0x3fae.1e1e.1e1e.1e1e + `@rd`0x3faa.f286.bca1.af28 `@rd`0x3fa8.6186.1861.8618 + == + =/ p2 (roll (flop cs) |=([c=@rd a=@rd] (~(add ^rd %n) (~(mul ^rd %n) a z) c))) + =/ r (~(mul ^rd %n) (~(add ^rd %n) z z) p2) + =/ l1 (~(sub ^rd %n) f (~(mul ^rd %n) s (~(sub ^rd %n) f r))) + =/ efa (~(sun ^rd %n) (abs:si e)) + =/ ef ?:((syn:si e) efa (~(sub ^rd %n) .~0 efa)) + [ef l1] :: +pow: [@rd @rd] -> @rd :: :: Returns the power of a floating-point atom to a floating-point exponent. @@ -1642,15 +2093,19 @@ :: .~316.2277660168379 :: Source ++ sqt + :: Correctly-rounded f64 square root. The stdlib f64 root is only + :: faithful (off by up to 1 ULP), so seed with it and apply one Markstein + :: correction (r = x - g*g via fma; g + (0.5/g)*r), which lands on the + :: correctly-rounded value -- matching the SoftFloat sqrt jet bit-for-bit. |= x=@rd ^- @rd - ?> (sgn x) - ?: =(.~0 x) .~0 - =/ g=@rd (div x .~2) - |- - =/ n=@rd (mul .~0.5 (add g (div x g))) - ?. (gth (abs (sub g n)) rtol) - n - $(g n) + ?: !(~(equ ^rd %n) x x) `@rd`0x7ff8.0000.0000.0000 :: NaN + ?: =(x `@rd`0x7ff0.0000.0000.0000) `@rd`0x7ff0.0000.0000.0000 :: +inf + ?: |(=(x `@rd`0x0) =(x `@rd`0x8000.0000.0000.0000)) x :: +-0 + ?: =(1 (rsh [0 63] x)) `@rd`0x7ff8.0000.0000.0000 :: x<0 -> NaN + =/ g (sqt:^rd x) :: faithful seed + =/ h (~(div ^rd %n) `@rd`0x3fe0.0000.0000.0000 g) :: 0.5/g + =/ r (~(fma ^rd %n) (~(sub ^rd %n) `@rd`0x0 g) g x) :: x - g*g + (~(fma ^rd %n) h r g) :: g + h*r :: +cbrt: @rd -> @rd :: :: Returns the cube root of a floating-point atom. @@ -1676,9 +2131,13 @@ :: .~1.2599210498948716 :: Source ++ cbt + :: cbrt(x) = sign(x) * exp(log|x| / 3); defined for all reals (unlike pow). |= x=@rd ^- @rd - ?> (sgn x) - (pow x .~0.3333333333333333) + ?: !(~(equ ^rd %n) x x) x :: NaN -> NaN + ?: |(=(x `@rd`0x0) =(x `@rd`0x8000.0000.0000.0000)) x :: +-0 -> +-0 + =/ ax `@rd`(dis x 0x7fff.ffff.ffff.ffff) + =/ r (exp (~(mul ^rd %n) (log ax) `@rd`0x3fd5.5555.5555.5555)) + ?:(=(1 (rsh [0 63] x)) (~(sub ^rd %n) `@rd`0x0 r) r) :: +arg: @rd -> @rd :: :: Returns the argument of a floating-point atom (real argument = absolute @@ -2200,29 +2659,47 @@ :: > (exp .~~inf) :: .inf :: Source + :: +widen-hs: f16 -> f32, exact. +narrow-sh: f32 -> f16, correctly-rounded + :: RNE. The @rh transcendentals compute in the (more precise) @rs door and + :: round the result down -- correctly-rounded for f16, since an f32 result is + :: ~2^13 times finer than an f16 ULP. + ++ widen-hs + |= h=@rh ^- @ + =/ hh `@`h + =/ s (lsh [0 16] (dis hh 0x8000)) + =/ e (dis (rsh [0 10] hh) 0x1f) + =/ m (dis hh 0x3ff) + ?: =(e 0x1f) (con s (con 0x7f80.0000 (lsh [0 13] m))) + ?: =(e 0) + ?: =(m 0) s + (con s `@`(~(mul rs [%n .1e-5]) (~(sun rs [%n .1e-5]) m) `@rs`0x3380.0000)) + (con s (con (lsh [0 23] (^add e 112)) (lsh [0 13] m))) + ++ narrow-sh + |= uu=@rs ^- @ + =/ u `@`uu + =/ s (dis (rsh [0 16] u) 0x8000) + =/ e (dis (rsh [0 23] u) 0xff) + =/ m (dis u 0x7f.ffff) + ?: =(e 0xff) (con s ?:(=(m 0) 0x7c00 0x7e00)) + ?: (^gte e 143) (con s 0x7c00) + ?: (^gte e 113) + =/ ne (^sub e 112) + =/ mant (rsh [0 13] m) + =/ rem (dis m 0x1fff) + =/ rup ?|((^gth rem 0x1000) &(=(rem 0x1000) =(1 (dis mant 1)))) + (con s (^add (lsh [0 10] ne) (^add mant ?:(rup 1 0)))) + ?: (^lth e 102) (con s 0x0) + =/ shift (^sub 126 e) + =/ mf (con 0x80.0000 m) + =/ mant (rsh [0 shift] mf) + =/ half (bex (dec shift)) + =/ rem (dis mf (dec (bex shift))) + =/ rup ?|((^gth rem half) &(=(rem half) =(1 (dis mant 1)))) + (con s (^add mant ?:(rup 1 0))) ++ exp + :: compute in @rs, round to f16 (see +narrow-sh). |= x=@rh ^- @rh - :: filter out non-finite arguments - ?: =(x 0x0) .~~1 - :: check infinities - ?: =(x 0x7c00) `@rh`0x7c00 :: exp(+inf) -> inf - ?: =(x 0xfc00) .~~0.0 :: exp(-inf) -> 0 - :: check NaN - ?. (^gte (dis 0x7e00 x) 0) `@rh`0x7e00 :: exp(NaN) -> NaN - :: check overflow to infinity - =/ o-threshold `@rh`0x498c :: 11.091265424003277, value above which exp(x) overflows - ?: (gth x o-threshold) (mul huge huge) - :: check underflow to zero - =/ u-threshold `@rh`0xc98c :: -11.091265424003277, value below which exp(x) underflows - ?: (lth x u-threshold) (mul tiny tiny) - :: otherwise, use Taylor series - =/ p .~~1 - =/ po .~~-1 - =/ i .~~1 - |- ^- @rh - ?: (lth (abs (sub po p)) rtol) - p - $(i (add i .~~1), p (add p (div (pow-n x i) (factorial i))), po p) + `@rh`(narrow-sh (~(exp rs [%n .1e-5]) `@rs`(widen-hs x))) :: +sin: @rh -> @rh :: :: Returns the sine of a floating-point atom. @@ -2236,25 +2713,7 @@ :: Source ++ sin |= x=@rh ^- @rh - :: filter out non-finite arguments - :: check infinities - ?: =(x 0x7c00) `@rh`0x7e00 :: sin(+inf) -> NaN - ?: =(x 0xfc00) `@rh`0x7e00 :: sin(-inf) -> NaN - :: check NaN - ?. (^gte (dis 0x7e00 x) 0) `@rh`0x7e00 :: sin(NaN) -> NaN - :: map into domain - =. x (mod x tau) - :: otherwise, use Taylor series - =/ p x - =/ po .~~-2 - =/ i 1 - =/ term x - |- ^- @rh - ?. (gth (abs term) rtol) - p - =/ i2 (add (sun i) (sun i)) - =. term (mul (neg term) (div (mul x x) (mul i2 (add i2 .~~1)))) - $(i +(i), p (add p term), po p) + `@rh`(narrow-sh (~(sin rs [%n .1e-5]) `@rs`(widen-hs x))) :: +cos: @rh -> @rh :: :: Returns the cosine of a floating-point atom. @@ -2268,25 +2727,7 @@ :: Source ++ cos |= x=@rh ^- @rh - :: filter out non-finite arguments - :: check infinities - ?: =(x 0x7c00) `@rh`0x7e00 :: cos(+inf) -> NaN - ?: =(x 0xfc00) `@rh`0x7e00 :: cos(-inf) -> NaN - :: check NaN - ?. (^gte (dis 0x7e00 x) 0) `@rh`0x7e00 :: cos(NaN) -> NaN - :: map into domain - =. x (mod x tau) - :: otherwise, use Taylor series - =/ p .~~1 - =/ po .~~-2 - =/ i 1 - =/ term .~~1 - |- ^- @rh - ?. (gth (abs term) rtol) - p - =/ i2 (add (sun i) (sun i)) - =. term (mul (neg term) (div (mul x x) (mul i2 (sub i2 .~~1)))) - $(i +(i), p (add p term), po p) + `@rh`(narrow-sh (~(cos rs [%n .1e-5]) `@rs`(widen-hs x))) :: +tan: @rh -> @rh :: :: Returns the tangent of a floating-point atom. @@ -2300,7 +2741,7 @@ :: Source ++ tan |= x=@rh ^- @rh - (div (sin x) (cos x)) + `@rh`(narrow-sh (~(tan rs [%n .1e-5]) `@rs`(widen-hs x))) :: +asin: @rh -> @rh :: :: Returns the inverse sine of a floating-point atom. @@ -2314,11 +2755,7 @@ :: ++ asin |= x=@rh ^- @rh - ?. (gte (abs x) .~~1) - (atan (div x (sqt (abs (sub .~~1 (mul x x)))))) - ?: =(.~~1 x) ^~((mul pi .~~0.5)) - ?: =(.~~-1 x) ^~((mul pi .~~-0.5)) - ~|([%asin-out-of-bounds x] !!) + `@rh`(narrow-sh (~(asin rs [%n .1e-5]) `@rs`(widen-hs x))) :: +acos: @rh -> @rh :: :: Returns the inverse cosine of a floating-point atom. @@ -2332,12 +2769,7 @@ :: ++ acos |= x=@rh ^- @rh - ?. (gte (abs x) .~~1) - ?: =(.~~0 x) ^~((mul pi .~~0.5)) - (atan (div (sqt (abs (sub .~~1 (mul x x)))) x)) - ?: =(.~~1 x) .~~0 - ?: =(.~~-1 x) pi - ~|([%acos-out-of-bounds x] !!) + `@rh`(narrow-sh (~(acos rs [%n .1e-5]) `@rs`(widen-hs x))) :: +atan: @rh -> @rh :: :: Returns the inverse tangent of a floating-point atom. @@ -2351,14 +2783,7 @@ :: ++ atan |= x=@rh ^- @rh - =/ a (pow (add .~~1 (mul x x)) .~~-0.5) - =/ b .~~1 - |- - ?. (gth (abs (sub a b)) rtol) - (div x (mul (pow (add .~~1 (mul x x)) .~~0.5) b)) - =/ ai (mul .~~0.5 (add a b)) - =/ bi (sqt (mul ai b)) - $(a ai, b bi) + `@rh`(narrow-sh (~(atan rs [%n .1e-5]) `@rs`(widen-hs x))) :: +atan2: [@rh @rh] -> @rh :: :: Returns the inverse tangent of a floating-point coordinate. @@ -2372,17 +2797,7 @@ :: ++ atan2 |= [y=@rh x=@rh] ^- @rh - ?: (gth x .~~0) - (atan (div y x)) - ?: &((lth x .~~0) (gte y .~~0)) - (add (atan (div y x)) pi) - ?: &((lth x .~~0) (lth y .~~0)) - (sub (atan (div y x)) pi) - ?: &(=(.~~0 x) (gth y .~~0)) - (div pi .~~2) - ?: &(=(.~~0 x) (lth y .~~0)) - (mul .~~-1 (div pi .~~2)) - .~~0 :: undefined + `@rh`(narrow-sh (~(atan2 rs [%n .1e-5]) `@rs`(widen-hs y) `@rs`(widen-hs x))) :: +pow-n: [@rh @rh] -> @rh :: :: Returns the power of a floating-point atom to an integer exponent. @@ -2396,13 +2811,7 @@ :: Source ++ pow-n |= [x=@rh n=@rh] ^- @rh - ?: =(n .~~0) .~~1 - ?> &((gth n .~~0) (is-int n)) - =/ p x - |- ^- @rh - ?: (lth n .~~2) - p - $(n (sub n .~~1), p (mul p x)) + `@rh`(narrow-sh (~(pow-n rs [%n .1e-5]) `@rs`(widen-hs x) `@rs`(widen-hs n))) :: +log: @rh -> @rh :: :: Returns the natural logarithm of a floating-point atom. @@ -2414,25 +2823,8 @@ :: > (~(log rh [%z .~~1e-1]) .~~2) :: .~~0.6904 ++ log - |= z=@rh ^- @rh - :: filter out non-finite arguments - :: check infinities - ?: =(z 0x7c00) `@rh`0x7c00 :: exp(+inf) -> inf - ?: =(z 0xfc00) .~~0.0 :: exp(-inf) -> 0 - :: check NaN - ?. (^gte (dis 0x7e00 z) 0) `@rh`0x7e00 :: exp(NaN) -> NaN - :: otherwise, use Taylor series - =/ p .~~0 - =/ po .~~-1 - =/ i .~~0 - |- ^- @rh - ?: (lth (abs (sub po p)) rtol) - (mul (div (mul .~~2 (sub z .~~1)) (add z .~~1)) p) - =/ term1 (div .~~1 (add .~~1 (mul .~~2 i))) - =/ term2 (mul (sub z .~~1) (sub z .~~1)) - =/ term3 (mul (add z .~~1) (add z .~~1)) - =/ term (mul term1 (pow-n (div term2 term3) i)) - $(i (add i .~~1), p (add p term), po p) + |= x=@rh ^- @rh + `@rh`(narrow-sh (~(log rs [%n .1e-5]) `@rs`(widen-hs x))) :: +log-10: @rh -> @rh :: :: Returns the base-10 logarithm of a floating-point atom. @@ -2440,8 +2832,8 @@ :: TODO :: Source ++ log-10 - |= z=@rh ^- @rh - (div (log z) log10) + |= x=@rh ^- @rh + `@rh`(narrow-sh (~(log-10 rs [%n .1e-5]) `@rs`(widen-hs x))) :: +log-2: @rh -> @rh :: :: Returns the base-2 logarithm of a floating-point atom. @@ -2449,8 +2841,8 @@ :: TODO :: Source ++ log-2 - |= z=@rh ^- @rh - (div (log z) log2) + |= x=@rh ^- @rh + `@rh`(narrow-sh (~(log-2 rs [%n .1e-5]) `@rs`(widen-hs x))) :: +pow: [@rh @rh] -> @rh :: :: Returns the power of a floating-point atom to a floating-point exponent. @@ -2464,9 +2856,7 @@ :: Source ++ pow |= [x=@rh n=@rh] ^- @rh - :: fall through on positive integers (faster) - ?: &(=(n (san (need (toi n)))) (gth n .~~0)) (pow-n x (san (need (toi n)))) - (exp (mul n (log x))) + `@rh`(narrow-sh (~(pow rs [%n .1e-5]) `@rs`(widen-hs x) `@rs`(widen-hs n))) :: +sqrt: @rh -> @rh :: :: Returns the square root of a floating-point atom. @@ -2493,14 +2883,7 @@ :: Source ++ sqt |= x=@rh ^- @rh - ?> (sgn x) - ?: =(.~~0 x) .~~0 - =/ g=@rh (div x .~~2) - |- - =/ n=@rh (mul .~~0.5 (add g (div x g))) - ?. (gth (abs (sub g n)) rtol) - n - $(g n) + `@rh`(narrow-sh (~(sqt rs [%n .1e-5]) `@rs`(widen-hs x))) :: +cbrt: @rh -> @rh :: :: Returns the cube root of a floating-point atom. @@ -2527,8 +2910,7 @@ :: Source ++ cbt |= x=@rh ^- @rh - ?> (sgn x) - (pow x .~~0.3333) + `@rh`(narrow-sh (~(cbt rs [%n .1e-5]) `@rs`(widen-hs x))) :: +arg: @rh -> @rh :: :: Returns the argument of a floating-point atom (real argument = absolute @@ -2986,28 +3368,49 @@ :: .~~~inf :: Source ++ exp + :: Cody-Waite reduction + degree-24 minimax (f128); round-nearest-even + :: internally (matches the SoftFloat jet, see tools/rq_check.c). |= x=@rq ^- @rq - :: filter out non-finite arguments - ?: =(x 0x0) .~~~1 - :: check infinities - ?: =(x 0x7fff.0000.0000.0000.0000.0000.0000.0000) `@rq`0x7fff.0000.0000.0000.0000.0000.0000.0000 :: exp(+inf) -> inf - ?: =(x 0xffff.0000.0000.0000.0000.0000.0000.0000) .~~~0.0 :: exp(-inf) -> 0 - :: check NaN - ?. (^gte (dis 0x7fff.8000.0000.0000.0000.0000.0000.0000 x) 0) `@rq`0x7fff.8000.0000.0000.0000.0000.0000.0000 :: exp(NaN) -> NaN - :: check overflow to infinity - =/ o-threshold `@rq`0x400c.62e4.2fef.a39e.f357.93c7.6730.0601 :: 1.135652340629414394949193107797e4, value above which exp(x) overflows - ?: (gth x o-threshold) (mul huge huge) - :: check underflow to zero - =/ u-threshold `@rq`0xc00c.62e4.2fef.a39e.f357.93c7.6730.0601 :: -1.135652340629414394949193107797e4, value below which exp(x) underflows - ?: (lth x u-threshold) (mul tiny tiny) - :: otherwise, use Taylor series - =/ p .~~~1 - =/ po .~~~-1 - =/ i .~~~1 - |- ^- @rq - ?: (lth (abs (sub po p)) rtol) - p - $(i (add i .~~~1), p (add p (div (pow-n x i) (factorial i))), po p) + =/ pow2 |=(j=@s `@rq`(lsh [0 112] (abs:si (sum:si j --16.383)))) + =/ scale2 + |= [p=@rq k=@s] ^- @rq + ?: (syn:si (dif:si k --16.384)) + (~(mul ^rq %n) (~(mul ^rq %n) p (pow2 --16.383)) (pow2 (dif:si k --16.383))) + ?: !(syn:si (sum:si k --16.382)) + (~(mul ^rq %n) (~(mul ^rq %n) p (pow2 (sum:si k --112))) (pow2 -112)) + (~(mul ^rq %n) p (pow2 k)) + ?: !(~(equ ^rq %n) x x) `@rq`0x7fff.8000.0000.0000.0000.0000.0000.0000 + ?: =(x `@rq`0x7fff.0000.0000.0000.0000.0000.0000.0000) `@rq`0x7fff.0000.0000.0000.0000.0000.0000.0000 + ?: =(x `@rq`0xffff.0000.0000.0000.0000.0000.0000.0000) `@rq`0x0 + =/ log2e `@rq`0x3fff.7154.7652.b82f.e177.7d0f.fda0.d23a + =/ ln2hi `@rq`0x3ffe.62e4.2fef.a39e.f357.93c8.0000.0000 + =/ ln2lo `@rq`0xbfad.319f.f034.2542.fc32.f366.359d.274a + =/ k=@s (need (~(toi ^rq %n) (~(mul ^rq %n) x log2e))) + ?: (syn:si (dif:si k --16.385)) `@rq`0x7fff.0000.0000.0000.0000.0000.0000.0000 + ?: !(syn:si (sum:si k --16.494)) `@rq`0x0 + =/ ka (~(sun ^rq %n) (abs:si k)) + =/ kf ?:((syn:si k) ka (~(sub ^rq %n) `@rq`0x0 ka)) + =/ r + %- ~(sub ^rq %n) + :- (~(sub ^rq %n) x (~(mul ^rq %n) kf ln2hi)) + (~(mul ^rq %n) kf ln2lo) + =/ cs=(list @rq) + :~ `@rq`0x3fff.0000.0000.0000.0000.0000.0000.0000 `@rq`0x3fff.0000.0000.0000.0000.0000.0000.0000 + `@rq`0x3ffe.0000.0000.0000.0000.0000.0000.0000 `@rq`0x3ffc.5555.5555.5555.5555.5555.5555.5555 + `@rq`0x3ffa.5555.5555.5555.5555.5555.5555.5555 `@rq`0x3ff8.1111.1111.1111.1111.1111.1111.1111 + `@rq`0x3ff5.6c16.c16c.16c1.6c16.c16c.16c1.6c17 `@rq`0x3ff2.a01a.01a0.1a01.a01a.01a0.1a01.a3e8 + `@rq`0x3fef.a01a.01a0.1a01.a01a.01a0.1a01.a146 `@rq`0x3fec.71de.3a55.6c73.38fa.ac1c.88a5.a526 + `@rq`0x3fe9.27e4.fb77.89f5.c72e.f016.d3d6.e867 `@rq`0x3fe5.ae64.567f.544e.38fe.7483.63c4.6e8b + `@rq`0x3fe2.1eed.8eff.8d89.7b54.4dab.18f4.75c5 `@rq`0x3fde.6124.613a.86d0.97c9.f3ae.babb.2423 + `@rq`0x3fda.9397.4a8c.07c9.d20b.83c7.f94d.17d8 `@rq`0x3fd6.ae7f.3e73.3b81.f5f4.284f.0d74.f9e7 + `@rq`0x3fd2.ae7f.3e73.3b81.f417.b4d2.7c5f.92a9 `@rq`0x3fce.952c.7703.0a99.6a41.9e67.4779.c97c + `@rq`0x3fca.6827.863b.97b5.0466.ff8c.8b42.b3df `@rq`0x3fc6.2f49.b469.f892.874b.7a68.6d81.9241 + `@rq`0x3fc1.e542.ba42.7463.bb3b.32a1.1bb5.f139 `@rq`0x3fbd.71b8.db9f.7f73.c938.90ff.9ab5.5cbb + `@rq`0x3fb9.0ce3.8aab.7bd7.6efc.0717.eae7.85a1 `@rq`0x3fb4.7693.274b.ab2a.cb3f.4f7e.dfaa.2666 + `@rq`0x3faf.f362.9154.e0a7.61cb.0e23.655d.47cb + == + =/ p (roll (flop cs) |=([c=@rq acc=@rq] (~(add ^rq %n) (~(mul ^rq %n) acc r) c))) + (scale2 p k) :: +sin: @rq -> @rq :: :: Returns the sine of a floating-point atom. @@ -3020,26 +3423,88 @@ :: .~~~2.4143733100361875441251426417684949e-23 :: Source ++ sin + :: q*pi/2 reduction + fdlibm kernels (f128); see +rq-trig. |= x=@rq ^- @rq - :: filter out non-finite arguments - :: check infinities - ?: =(x 0x7fff.0000.0000.0000.0000.0000.0000.0000) `@rq`0x7fff.8000.0000.0000.0000.0000.0000.0000 :: sin(+inf) -> NaN - ?: =(x 0xffff.0000.0000.0000.0000.0000.0000.0000) `@rq`0x7fff.8000.0000.0000.0000.0000.0000.0000 :: sin(-inf) -> NaN - :: check NaN - ?. (^gte (dis 0x7fff.8000.0000.0000.0000.0000.0000.0000 x) 0) `@rq`0x7fff.8000.0000.0000.0000.0000.0000.0000 :: sin(NaN) -> NaN - :: map into domain - =. x (mod x tau) - :: otherwise, use Taylor series - =/ p x - =/ po .~~~-2 - =/ i 1 - =/ term x - |- ^- @rq - ?. (gth (abs term) rtol) - p - =/ i2 (add (sun i) (sun i)) - =. term (mul (neg term) (div (mul x x) (mul i2 (add i2 .~~~1)))) - $(i +(i), p (add p term), po p) + ?: !(~(equ ^rq %n) x x) `@rq`0x7fff.8000.0000.0000.0000.0000.0000.0000 + ?: |(=(x `@rq`0x7fff.0000.0000.0000.0000.0000.0000.0000) =(x `@rq`0xffff.0000.0000.0000.0000.0000.0000.0000)) `@rq`0x7fff.8000.0000.0000.0000.0000.0000.0000 + ?: |(=(x `@rq`0x0) =(x `@rq`0x8000.0000.0000.0000.0000.0000.0000.0000)) x + %- trig-fin:rq-trig + [%.y `@rq`(dis x 0x7fff.ffff.ffff.ffff.ffff.ffff.ffff.ffff) (rsh [0 127] x)] + ++ rq-trig + |% + ++ sc + ^- (list @rq) + :~ `@rq`0xbffc.5555.5555.5555.5555.5555.5555.5555 + `@rq`0x3ff8.1111.1111.1111.1111.1111.1111.1111 + `@rq`0xbff2.a01a.01a0.1a01.a01a.01a0.1a01.a01a + `@rq`0x3fec.71de.3a55.6c73.38fa.ac1c.88e5.0017 + `@rq`0xbfe5.ae64.567f.544e.38fe.747e.4b83.7dc7 + `@rq`0x3fde.6124.613a.86d0.97ca.3833.1d23.af68 + `@rq`0xbfd6.ae7f.3e73.3b81.f11d.8656.b0ee.8cb0 + `@rq`0x3fce.952c.7703.0ad4.a6b2.6051.9777.1b00 + `@rq`0xbfc6.2f49.b468.1415.724c.a1ec.3b7b.9675 + `@rq`0x3fbd.71b8.ef6d.cf57.18be.f146.fcee.6e45 + `@rq`0xbfb4.761b.4131.6381.9d97.b870.4dd7.f628 + `@rq`0x3fab.3f3c.cdd1.65fa.8d4e.44a4.1977.6f11 + `@rq`0xbfa1.d1ab.1c2d.ccea.320a.9a18.f15d.4277 + `@rq`0x3f98.259f.98b4.358a.d7ab.e30e.7766.f129 + `@rq`0xbf8e.434d.2e78.3f5b.c42e.1ee4.6fa6.bfc4 + `@rq`0x3f84.3981.254d.d0d5.1b53.82cd.ffa9.7422 + == + ++ cc + ^- (list @rq) + :~ `@rq`0x3ffa.5555.5555.5555.5555.5555.5555.5555 + `@rq`0xbff5.6c16.c16c.16c1.6c16.c16c.16c1.6c17 + `@rq`0x3fef.a01a.01a0.1a01.a01a.01a0.1a01.a01a + `@rq`0xbfe9.27e4.fb77.89f5.c72e.f016.d3ea.6679 + `@rq`0x3fe2.1eed.8eff.8d89.7b54.4da9.87ac.fe85 + `@rq`0xbfda.9397.4a8c.07c9.d20b.adf1.45df.a3e5 + `@rq`0x3fd2.ae7f.3e73.3b81.f11d.8656.b0ee.8cb0 + `@rq`0xbfca.6827.863b.97d9.77bb.0048.86a2.c2ab + `@rq`0x3fc1.e542.ba40.2022.507a.9cad.2bf8.f0bb + `@rq`0xbfb9.0ce3.96db.7f85.2945.0c90.b7f3.38ec + `@rq`0x3faf.f2cf.0197.2f57.7cca.4b40.67ca.9d8a + `@rq`0xbfa6.88e8.5fc6.a4e5.9a38.f205.0ba6.b015 + `@rq`0x3f9d.0a18.a263.5085.d373.c5c5.1c35.4a8d + `@rq`0xbf93.3932.c504.7d60.e60c.aded.4c29.89c5 + `@rq`0x3f89.434d.2e78.3f5b.c42e.1ee4.6fa6.bfc4 + `@rq`0xbf7f.2710.231c.0fd7.a13f.8a2b.4af9.d6b7 + == + ++ neg |=(a=@rq ^-(@rq (~(sub ^rq %n) `@rq`0x0 a))) + ++ ksin + |= [xx=@rq yy=@rq] ^- @rq + =/ z (~(mul ^rq %n) xx xx) + =/ r (roll (flop (tail sc)) |=([c=@rq a=@rq] (~(add ^rq %n) (~(mul ^rq %n) a z) c))) + =/ v (~(mul ^rq %n) z xx) + =/ aa (~(sub ^rq %n) (~(mul ^rq %n) `@rq`0x3ffe.0000.0000.0000.0000.0000.0000.0000 yy) (~(mul ^rq %n) v r)) + =/ bb (~(sub ^rq %n) (~(mul ^rq %n) z aa) yy) + =/ dd (~(sub ^rq %n) bb (~(mul ^rq %n) v (head sc))) + (~(sub ^rq %n) xx dd) + ++ kcos + |= [xx=@rq yy=@rq] ^- @rq + =/ z (~(mul ^rq %n) xx xx) + =/ rc (roll (flop cc) |=([c=@rq a=@rq] (~(add ^rq %n) (~(mul ^rq %n) a z) c))) + =/ hz (~(mul ^rq %n) `@rq`0x3ffe.0000.0000.0000.0000.0000.0000.0000 z) + =/ w2 (~(sub ^rq %n) `@rq`0x3fff.0000.0000.0000.0000.0000.0000.0000 hz) + =/ aa (~(sub ^rq %n) (~(sub ^rq %n) `@rq`0x3fff.0000.0000.0000.0000.0000.0000.0000 w2) hz) + =/ bb (~(sub ^rq %n) (~(mul ^rq %n) (~(mul ^rq %n) z z) rc) (~(mul ^rq %n) xx yy)) + (~(add ^rq %n) w2 (~(add ^rq %n) aa bb)) + ++ trig-fin + |= [s=? ax=@rq sb=@] ^- @rq + =/ q (need (~(toi ^rq %n) (~(mul ^rq %n) ax `@rq`0x3ffe.45f3.06dc.9c88.2a53.f84e.afa3.ea6a))) + =/ qf (~(sun ^rq %n) (abs:si q)) + =/ t (~(sub ^rq %n) ax (~(mul ^rq %n) qf `@rq`0x3fff.921f.b544.42d1.8460.0000.0000.0000)) + =/ w (~(mul ^rq %n) qf `@rq`0x3fc2.3131.98a2.e037.0734.4a40.9382.229a) + =/ rhi (~(sub ^rq %n) t w) + =/ rlo (~(sub ^rq %n) (~(sub ^rq %n) t rhi) w) + =/ m (dis (abs:si q) 3) + =/ ks (ksin rhi rlo) + =/ kc (kcos rhi rlo) + ?: s + =/ v ?:(=(m 0) ks ?:(=(m 1) kc ?:(=(m 2) (neg ks) (neg kc)))) + ?:(=(sb 1) (neg v) v) + ?:(=(m 0) kc ?:(=(m 1) (neg ks) ?:(=(m 2) (neg kc) ks))) + -- :: +cos: @rq -> @rq :: :: Returns the cosine of a floating-point atom. @@ -3053,25 +3518,10 @@ :: Source ++ cos |= x=@rq ^- @rq - :: filter out non-finite arguments - :: check infinities - ?: =(x 0x7fff.0000.0000.0000.0000.0000.0000.0000) `@rq`0x7fff.8000.0000.0000.0000.0000.0000.0000 :: cos(+inf) -> NaN - ?: =(x 0xffff.0000.0000.0000.0000.0000.0000.0000) `@rq`0x7fff.8000.0000.0000.0000.0000.0000.0000 :: cos(-inf) -> NaN - :: check NaN - ?. (^gte (dis 0x7fff.8000.0000.0000.0000.0000.0000.0000 x) 0) `@rq`0x7fff.8000.0000.0000.0000.0000.0000.0000 :: cos(NaN) -> NaN - :: map into domain - =. x (mod x tau) - :: otherwise, use Taylor series - =/ p .~~~1 - =/ po .~~~-2 - =/ i 1 - =/ term .~~~1 - |- ^- @rq - ?. (gth (abs term) rtol) - p - =/ i2 (add (sun i) (sun i)) - =. term (mul (neg term) (div (mul x x) (mul i2 (sub i2 .~~~1)))) - $(i +(i), p (add p term), po p) + ?: !(~(equ ^rq %n) x x) `@rq`0x7fff.8000.0000.0000.0000.0000.0000.0000 + ?: |(=(x `@rq`0x7fff.0000.0000.0000.0000.0000.0000.0000) =(x `@rq`0xffff.0000.0000.0000.0000.0000.0000.0000)) `@rq`0x7fff.8000.0000.0000.0000.0000.0000.0000 + %- trig-fin:rq-trig + [%.n `@rq`(dis x 0x7fff.ffff.ffff.ffff.ffff.ffff.ffff.ffff) 0] :: +tan: @rq -> @rq :: :: Returns the tangent of a floating-point atom. @@ -3098,12 +3548,86 @@ :: .~~~0.7753974966107530637394463388579305 :: ++ asin + :: fdlibm rational-form kernel (poly R, deg-30) + sqrt head/tail; f128. |= x=@rq ^- @rq - ?. (gte (abs x) .~~~1) - (atan (div x (sqt (abs (sub .~~~1 (mul x x)))))) - ?: =(.~~~1 x) ^~((mul pi .~~~0.5)) - ?: =(.~~~-1 x) ^~((mul pi .~~~-0.5)) - ~|([%asin-out-of-bounds x] !!) + (asn:rq-ainv x) + ++ acos + |= x=@rq ^- @rq + (acs:rq-ainv x) + ++ rq-ainv + |% + ++ rr + |= t=@rq ^- @rq + =/ cs=(list @rq) + :~ `@rq`0x3f80.8991.2e54.d43f.83f4.00d5.0d55.a7e8 `@rq`0x3ffc.5555.5555.5555.5555.5555.5555.5552 + `@rq`0x3ffb.3333.3333.3333.3333.3333.3333.5009 `@rq`0x3ffa.6db6.db6d.b6db.6db6.db6d.b668.951b + `@rq`0x3ff9.f1c7.1c71.c71c.71c7.1c72.bac1.ec0e `@rq`0x3ff9.6e8b.a2e8.ba2e.8ba2.e81a.a31a.41d8 + `@rq`0x3ff9.1c4e.c4ec.4ec4.ec4f.0b6f.e680.df37 `@rq`0x3ff8.c999.9999.9999.996c.f372.753e.7c99 + `@rq`0x3ff8.7a87.8787.8787.9217.7cc3.2753.53f7 `@rq`0x3ff8.3fde.50d7.9433.f7f1.afe5.cbea.c090 + `@rq`0x3ff8.12ef.3cf3.cf83.f127.037c.33f0.fb5b `@rq`0x3ff7.df3b.d37a.5ede.5c11.4e9d.a47d.edfd + `@rq`0x3ff7.a686.3d72.3133.909e.0729.7f5e.2958 `@rq`0x3ff7.782d.d9f3.ff64.52ce.6cb8.56ef.901f + `@rq`0x3ff7.51ba.328f.884c.2fdb.62f4.1c70.9bd1 `@rq`0x3ff7.3168.1fe6.e02d.b0d4.9614.21ef.a7ec + `@rq`0x3ff7.15ef.e556.e52a.3408.e80b.5b3f.8641 `@rq`0x3ff6.fc96.253e.cb71.1812.7122.68a4.5b42 + `@rq`0x3ff6.d4a8.2428.408a.ebe9.bc6e.47ec.09af `@rq`0x3ff6.aa37.7fe9.13f6.c0a9.facb.9451.1de4 + `@rq`0x3ff6.b48c.a21a.48d1.8473.7463.fe86.56d8 `@rq`0x3ff5.7e9a.a4b5.b4c4.e391.3451.8885.e78f + `@rq`0x3ff8.1906.4c51.85fa.a0d0.3ab9.c514.26b2 `@rq`0xbff9.300f.0da2.da1e.1c08.5f96.2a89.aacc + `@rq`0x3ffb.0643.398c.dbcb.97d2.5a1b.e10b.6c8a `@rq`0xbffc.27ed.3dd5.cd82.528e.0d54.bf4f.5e05 + `@rq`0x3ffd.1d64.319b.e957.ad88.0c8c.d533.b68b `@rq`0xbffd.9731.e485.678b.81c3.902a.2c54.acc7 + `@rq`0x3ffd.ae10.872f.69b7.a99a.a13d.6e9c.d204 `@rq`0xbffd.228f.c652.7609.25c1.64c7.f610.91fa + `@rq`0x3ffb.9aa4.ca63.cbd7.2c15.b8ad.9b23.77ce + == + (roll (flop cs) |=([c=@rq a=@rq] (~(add ^rq %n) (~(mul ^rq %n) a t) c))) + ++ asn + |= x=@rq ^- @rq + ?: !(~(equ ^rq %n) x x) `@rq`0x7fff.8000.0000.0000.0000.0000.0000.0000 + =/ sgn (rsh [0 127] x) + =/ ax `@rq`(dis x 0x7fff.ffff.ffff.ffff.ffff.ffff.ffff.ffff) + ?: (~(gth ^rq %n) ax `@rq`0x3fff.0000.0000.0000.0000.0000.0000.0000) `@rq`0x7fff.8000.0000.0000.0000.0000.0000.0000 + ?: =(ax `@rq`0x3fff.0000.0000.0000.0000.0000.0000.0000) (~(add ^rq %n) (~(mul ^rq %n) x `@rq`0x3fff.921f.b544.42d1.8469.898c.c517.01b8) (~(mul ^rq %n) x `@rq`0x3f8c.cd12.9024.e088.a67c.c740.20bb.ea64)) + ?: (~(lth ^rq %n) ax `@rq`0x3ffe.0000.0000.0000.0000.0000.0000.0000) + ?: (~(lth ^rq %n) ax `@rq`0x3fc6.0000.0000.0000.0000.0000.0000.0000) x + (~(add ^rq %n) x (~(mul ^rq %n) x (rr (~(mul ^rq %n) x x)))) + =/ w (~(sub ^rq %n) `@rq`0x3fff.0000.0000.0000.0000.0000.0000.0000 ax) + =/ t (~(mul ^rq %n) w `@rq`0x3ffe.0000.0000.0000.0000.0000.0000.0000) + =/ r (rr t) + =/ s (sqt t) + ?: (~(gte ^rq %n) ax `@rq`0x3ffe.f333.3333.3333.3333.3333.3333.3333) + =/ res (~(sub ^rq %n) `@rq`0x3fff.921f.b544.42d1.8469.898c.c517.01b8 (~(sub ^rq %n) (~(mul ^rq %n) `@rq`0x4000.0000.0000.0000.0000.0000.0000.0000 (~(add ^rq %n) s (~(mul ^rq %n) s r))) `@rq`0x3f8c.cd12.9024.e088.a67c.c740.20bb.ea64)) + ?:(=(sgn 1) (~(sub ^rq %n) `@rq`0x0 res) res) + =/ df `@rq`(dis s 0xffff.ffff.ffff.ffff.ff00.0000.0000.0000) + =/ c (~(div ^rq %n) (~(sub ^rq %n) t (~(mul ^rq %n) df df)) (~(add ^rq %n) s df)) + =/ p2 (~(sub ^rq %n) (~(mul ^rq %n) `@rq`0x4000.0000.0000.0000.0000.0000.0000.0000 (~(mul ^rq %n) s r)) (~(sub ^rq %n) `@rq`0x3f8c.cd12.9024.e088.a67c.c740.20bb.ea64 (~(mul ^rq %n) `@rq`0x4000.0000.0000.0000.0000.0000.0000.0000 c))) + =/ q2 (~(sub ^rq %n) `@rq`0x3ffe.921f.b544.42d1.8469.898c.c517.01b8 (~(mul ^rq %n) `@rq`0x4000.0000.0000.0000.0000.0000.0000.0000 df)) + =/ res (~(sub ^rq %n) `@rq`0x3ffe.921f.b544.42d1.8469.898c.c517.01b8 (~(sub ^rq %n) p2 q2)) + ?:(=(sgn 1) (~(sub ^rq %n) `@rq`0x0 res) res) + ++ acs + |= x=@rq ^- @rq + ?: !(~(equ ^rq %n) x x) `@rq`0x7fff.8000.0000.0000.0000.0000.0000.0000 + =/ neg (rsh [0 127] x) + =/ ax `@rq`(dis x 0x7fff.ffff.ffff.ffff.ffff.ffff.ffff.ffff) + ?: (~(gth ^rq %n) ax `@rq`0x3fff.0000.0000.0000.0000.0000.0000.0000) `@rq`0x7fff.8000.0000.0000.0000.0000.0000.0000 + ?: =(ax `@rq`0x3fff.0000.0000.0000.0000.0000.0000.0000) + ?: =(neg 0) `@rq`0x0 + (~(add ^rq %n) `@rq`0x4000.921f.b544.42d1.8469.898c.c517.01b8 (~(mul ^rq %n) `@rq`0x4000.0000.0000.0000.0000.0000.0000.0000 `@rq`0x3f8c.cd12.9024.e088.a67c.c740.20bb.ea64)) + ?: (~(lth ^rq %n) ax `@rq`0x3ffe.0000.0000.0000.0000.0000.0000.0000) + ?: (~(lth ^rq %n) ax `@rq`0x3f87.0000.0000.0000.0000.0000.0000.0000) `@rq`0x3fff.921f.b544.42d1.8469.898c.c517.01b8 + =/ z (~(mul ^rq %n) x x) + =/ r (rr z) + (~(sub ^rq %n) `@rq`0x3fff.921f.b544.42d1.8469.898c.c517.01b8 (~(sub ^rq %n) x (~(sub ^rq %n) `@rq`0x3f8c.cd12.9024.e088.a67c.c740.20bb.ea64 (~(mul ^rq %n) x r)))) + ?: =(neg 1) + =/ z (~(mul ^rq %n) (~(add ^rq %n) `@rq`0x3fff.0000.0000.0000.0000.0000.0000.0000 x) `@rq`0x3ffe.0000.0000.0000.0000.0000.0000.0000) + =/ s (sqt z) + =/ r (rr z) + =/ w (~(sub ^rq %n) (~(mul ^rq %n) r s) `@rq`0x3f8c.cd12.9024.e088.a67c.c740.20bb.ea64) + (~(sub ^rq %n) `@rq`0x4000.921f.b544.42d1.8469.898c.c517.01b8 (~(mul ^rq %n) `@rq`0x4000.0000.0000.0000.0000.0000.0000.0000 (~(add ^rq %n) s w))) + =/ z (~(mul ^rq %n) (~(sub ^rq %n) `@rq`0x3fff.0000.0000.0000.0000.0000.0000.0000 x) `@rq`0x3ffe.0000.0000.0000.0000.0000.0000.0000) + =/ s (sqt z) + =/ df `@rq`(dis s 0xffff.ffff.ffff.ffff.ff00.0000.0000.0000) + =/ c (~(div ^rq %n) (~(sub ^rq %n) z (~(mul ^rq %n) df df)) (~(add ^rq %n) s df)) + =/ r (rr z) + =/ w (~(add ^rq %n) (~(mul ^rq %n) r s) c) + (~(mul ^rq %n) `@rq`0x4000.0000.0000.0000.0000.0000.0000.0000 (~(add ^rq %n) df w)) + -- :: +acos: @rq -> @rq :: :: Returns the inverse cosine of a floating-point atom. @@ -3115,14 +3639,6 @@ :: > (acos .~~~0.7) :: .~~~0.7953988301841435554899943710156033 :: - ++ acos - |= x=@rq ^- @rq - ?. (gte (abs x) .~~~1) - ?: =(.~~~0 x) ^~((mul pi .~~~0.5)) - (atan (div (sqt (abs (sub .~~~1 (mul x x)))) x)) - ?: =(.~~~1 x) .~~~0 - ?: =(.~~~-1 x) pi - ~|([%acos-out-of-bounds x] !!) :: +atan: @rq -> @rq :: :: Returns the inverse tangent of a floating-point atom. @@ -3135,15 +3651,57 @@ :: .~~~1.2626272556789116834540013074115034 :: ++ atan + :: fdlibm breakpoint reduction + degree-30 minimax (f128); odd. |= x=@rq ^- @rq - =/ a (pow (add .~~~1 (mul x x)) .~~~-0.5) - =/ b .~~~1 - |- - ?. (gth (abs (sub a b)) rtol) - (div x (mul (pow (add .~~~1 (mul x x)) .~~~0.5) b)) - =/ ai (mul .~~~0.5 (add a b)) - =/ bi (sqt (mul ai b)) - $(a ai, b bi) + ?: !(~(equ ^rq %n) x x) `@rq`0x7fff.8000.0000.0000.0000.0000.0000.0000 + ?: =(x `@rq`0x7fff.0000.0000.0000.0000.0000.0000.0000) `@rq`0x3fff.921f.b544.42d1.8469.898c.c517.01b8 + ?: =(x `@rq`0xffff.0000.0000.0000.0000.0000.0000.0000) `@rq`0xbfff.921f.b544.42d1.8469.898c.c517.01b8 + ?: |(=(x `@rq`0x0) =(x `@rq`0x8000.0000.0000.0000.0000.0000.0000.0000)) x + =/ neg (rsh [0 127] x) + =/ r (ker:rq-atan `@rq`(dis x 0x7fff.ffff.ffff.ffff.ffff.ffff.ffff.ffff)) + ?:(=(neg 1) (~(sub ^rq %n) `@rq`0x0 r) r) + ++ rq-atan + |% + ++ at + ^- (list @rq) + :~ `@rq`0x3ffd.5555.5555.5555.5555.5555.5555.5555 `@rq`0xbffc.9999.9999.9999.9999.9999.9999.999a + `@rq`0x3ffc.2492.4924.9249.2492.4924.9249.2492 `@rq`0xbffb.c71c.71c7.1c71.c71c.71c7.1c71.c705 + `@rq`0x3ffb.745d.1745.d174.5d17.45d1.745c.f720 `@rq`0xbffb.3b13.b13b.13b1.3b13.b13b.1395.a0f6 + `@rq`0x3ffb.1111.1111.1111.1111.1111.010e.24e1 `@rq`0xbffa.e1e1.e1e1.e1e1.e1e1.e1d4.8fd7.bd0f + `@rq`0x3ffa.af28.6bca.1af2.86bc.9d8a.1266.1ce3 `@rq`0xbffa.8618.6186.1861.8617.62af.171f.46fb + `@rq`0x3ffa.642c.8590.b216.4297.f77f.1796.654a `@rq`0xbffa.47ae.147a.e147.a6ae.eb97.4d91.c763 + `@rq`0x3ffa.2f68.4bda.12f5.982c.8384.0df4.8c76 `@rq`0xbffa.1a7b.9611.a7a0.f241.3484.8b6f.9bc3 + `@rq`0x3ffa.0842.1084.1eed.46f1.272e.8718.edfe `@rq`0xbff9.f07c.1f07.73e1.dac4.76af.1946.ed1a + `@rq`0x3ff9.d41d.41cf.56a0.771b.4773.d1fd.bc46 `@rq`0xbff9.bacf.910c.a5ea.d9a2.c9f0.ffa2.8317 + `@rq`0x3ff9.a41a.3ed6.e709.5da3.c3e4.8cd5.5593 `@rq`0xbff9.8f9b.fe02.ad67.4d8a.c158.72fb.b51c + `@rq`0x3ff9.7d05.170d.1702.069f.17b9.5f6e.54f4 `@rq`0xbff9.6c10.bc42.d041.6ea0.5add.542a.f078 + `@rq`0x3ff9.5c74.e7f6.f412.eab1.21f2.0635.a41a `@rq`0xbff9.4dac.2e21.aa0e.283b.7552.8258.306b + `@rq`0x3ff9.3e57.3faa.c561.db9d.4b05.d70c.99cf `@rq`0xbff9.2af3.2cae.28f7.4f59.6690.8860.d11a + `@rq`0x3ff9.0c8b.03c5.5304.dec0.acdf.4b35.6e20 `@rq`0xbff8.b4ed.3a33.49ac.f7ad.9702.76c5.cf2b + `@rq`0x3ff8.261c.1a9e.da3a.d5dd.688f.198a.e3cb `@rq`0xbff7.1a7a.c449.b285.876f.4b57.d627.e71e + `@rq`0x3ff5.1a4e.a418.ebe8.1381.31c1.5128.032a + == + ++ atred + |= ax=@rq ^- [xr=@rq hi=@rq lo=@rq dir=?] + ?: (~(lth ^rq %n) ax `@rq`0x3ffd.c000.0000.0000.0000.0000.0000.0000) [ax `@rq`0x0 `@rq`0x0 %.y] + ?: (~(lth ^rq %n) ax `@rq`0x3ffe.6000.0000.0000.0000.0000.0000.0000) + :* (~(div ^rq %n) (~(sub ^rq %n) (~(add ^rq %n) ax ax) `@rq`0x3fff.0000.0000.0000.0000.0000.0000.0000) (~(add ^rq %n) `@rq`0x4000.0000.0000.0000.0000.0000.0000.0000 ax)) + `@rq`0x3ffd.dac6.7056.1bb4.f68a.dfc8.8bd9.7875 `@rq`0x3f89.a06d.c282.b0e4.c39b.e01c.59e2.dcdd %.n == + ?: (~(lth ^rq %n) ax `@rq`0x3fff.3000.0000.0000.0000.0000.0000.0000) + :* (~(div ^rq %n) (~(sub ^rq %n) ax `@rq`0x3fff.0000.0000.0000.0000.0000.0000.0000) (~(add ^rq %n) ax `@rq`0x3fff.0000.0000.0000.0000.0000.0000.0000)) + `@rq`0x3ffe.921f.b544.42d1.8469.898c.c517.01b8 `@rq`0x3f8b.cd12.9024.e088.a67c.c740.20bb.ea64 %.n == + ?: (~(lth ^rq %n) ax `@rq`0x4000.3800.0000.0000.0000.0000.0000.0000) + :* (~(div ^rq %n) (~(sub ^rq %n) ax `@rq`0x3fff.8000.0000.0000.0000.0000.0000.0000) (~(add ^rq %n) `@rq`0x3fff.0000.0000.0000.0000.0000.0000.0000 (~(mul ^rq %n) `@rq`0x3fff.8000.0000.0000.0000.0000.0000.0000 ax))) + `@rq`0x3ffe.f730.bd28.1f69.b200.f10f.5e19.7794 `@rq`0xbf8b.ebe5.66c9.9ada.9f23.1bcc.ae27.916c %.n == + :* (~(div ^rq %n) `@rq`0xbfff.0000.0000.0000.0000.0000.0000.0000 ax) `@rq`0x3fff.921f.b544.42d1.8469.898c.c517.01b8 `@rq`0x3f8c.cd12.9024.e088.a67c.c740.20bb.ea64 %.n == + ++ ker + |= ax=@rq ^- @rq + =/ q (atred ax) + =/ z (~(mul ^rq %n) xr.q xr.q) + =/ s (~(mul ^rq %n) z (roll (flop at) |=([c=@rq a=@rq] (~(add ^rq %n) (~(mul ^rq %n) a z) c)))) + ?: dir.q (~(sub ^rq %n) xr.q (~(mul ^rq %n) xr.q s)) + (~(sub ^rq %n) hi.q (~(sub ^rq %n) (~(sub ^rq %n) (~(mul ^rq %n) xr.q s) lo.q) xr.q)) + -- :: +atan2: [@rq @rq] -> @rq :: :: Returns the inverse tangent of a floating-point coordinate. @@ -3157,17 +3715,13 @@ :: ++ atan2 |= [y=@rq x=@rq] ^- @rq - ?: (gth x .~~~0) - (atan (div y x)) - ?: &((lth x .~~~0) (gte y .~~~0)) - (add (atan (div y x)) pi) - ?: &((lth x .~~~0) (lth y .~~~0)) - (sub (atan (div y x)) pi) - ?: &(=(.~~~0 x) (gth y .~~~0)) - (div pi .~~~2) - ?: &(=(.~~~0 x) (lth y .~~~0)) - (mul .~~~-1 (div pi .~~~2)) - .~~~0 :: undefined + ?: (~(gth ^rq %n) x `@rq`0x0) (atan (~(div ^rq %n) y x)) + ?: &((~(lth ^rq %n) x `@rq`0x0) (~(gte ^rq %n) y `@rq`0x0)) (~(add ^rq %n) (atan (~(div ^rq %n) y x)) `@rq`0x4000.921f.b544.42d1.8469.898c.c517.01b8) + ?: &((~(lth ^rq %n) x `@rq`0x0) (~(lth ^rq %n) y `@rq`0x0)) (~(sub ^rq %n) (atan (~(div ^rq %n) y x)) `@rq`0x4000.921f.b544.42d1.8469.898c.c517.01b8) + ?: &(=(`@rq`0x0 x) (~(gth ^rq %n) y `@rq`0x0)) `@rq`0x3fff.921f.b544.42d1.8469.898c.c517.01b8 + ?: &(=(`@rq`0x0 x) (~(lth ^rq %n) y `@rq`0x0)) `@rq`0xbfff.921f.b544.42d1.8469.898c.c517.01b8 + `@rq`0x0 + :: +pow-n: [@rq @rq] -> @rq :: :: Returns the power of a floating-point atom to a signed integer exponent. @@ -3179,13 +3733,12 @@ :: Source ++ pow-n |= [x=@rq n=@rq] ^- @rq - ?: =(n .~~~0) .~~~1 - ?> &((gth n .~~~0) (is-int n)) - =/ p x + ?: =(n `@rq`0x0) `@rq`0x3fff.0000.0000.0000.0000.0000.0000.0000 + =/ i (need (~(toi ^rq %n) n)) + =/ p `@rq`0x3fff.0000.0000.0000.0000.0000.0000.0000 |- ^- @rq - ?: (lth n .~~~2) - p - $(n (sub n .~~~1), p (mul p x)) + ?: =(i --0) p + $(i (dif:si i --1), p (~(mul ^rq %n) p x)) :: +log: @rq -> @rq :: :: Returns the natural logarithm of a floating-point atom. @@ -3200,25 +3753,48 @@ :: .~~~inf :: Source ++ log - |= z=@rq ^- @rq - :: filter out non-finite arguments - :: check infinities - ?: =(z 0x7fff.0000.0000.0000.0000.0000.0000.0000) `@rq`0x7fff.0000.0000.0000.0000.0000.0000.0000 :: exp(+inf) -> inf - ?: =(z 0xffff.0000.0000.0000.0000.0000.0000.0000) .~~~0.0 :: exp(-inf) -> 0 - :: check NaN - ?. (^gte (dis 0x7fff.8000.0000.0000.0000.0000.0000.0000 z) 0) `@rq`0x7fff.8000.0000.0000.0000.0000.0000.0000 :: exp(NaN) -> NaN - :: otherwise, use Taylor series - =/ p .~~~0 - =/ po .~~~-1 - =/ i .~~~0 - |- ^- @rq - ?: (lth (abs (sub po p)) rtol) - (mul (div (mul .~~~2 (sub z .~~~1)) (add z .~~~1)) p) - =/ term1 (div .~~~1 (add .~~~1 (mul .~~~2 i))) - =/ term2 (mul (sub z .~~~1) (sub z .~~~1)) - =/ term3 (mul (add z .~~~1) (add z .~~~1)) - =/ term (mul term1 (pow-n (div term2 term3) i)) - $(i (add i .~~~1), p (add p term), po p) + :: x = 2^e * m reduction + atanh series (fdlibm f - s*(f-R)); f128. + :: Round-nearest-even internally (matches tools/rq_check.c). + |= x=@rq ^- @rq + ?: !(~(equ ^rq %n) x x) `@rq`0x7fff.8000.0000.0000.0000.0000.0000.0000 + ?: =(x `@rq`0x7fff.0000.0000.0000.0000.0000.0000.0000) `@rq`0x7fff.0000.0000.0000.0000.0000.0000.0000 + ?: |(=(x `@rq`0x0) =(x `@rq`0x8000.0000.0000.0000.0000.0000.0000.0000)) `@rq`0xffff.0000.0000.0000.0000.0000.0000.0000 + ?: =(1 (rsh [0 127] x)) `@rq`0x7fff.8000.0000.0000.0000.0000.0000.0000 + =/ sub =(0 (dis 0x7fff (rsh [0 112] x))) + =/ xx ?:(sub (~(mul ^rq %n) x `@rq`0x4077.0000.0000.0000.0000.0000.0000.0000) x) + =/ ae ?:(sub -120 --0) + =/ b `@`xx + =/ e (dif:si (new:si %.y (dis 0x7fff (rsh [0 112] b))) --16.383) + =/ m `@rq`(con (dis b 0xffff.ffff.ffff.ffff.ffff.ffff.ffff) 0x3fff.0000.0000.0000.0000.0000.0000.0000) + =/ big (~(gte ^rq %n) m `@rq`0x3fff.6a09.e667.f3bc.c908.b2fb.1366.ea95) + =? m big (~(mul ^rq %n) m `@rq`0x3ffe.0000.0000.0000.0000.0000.0000.0000) + =? e big (sum:si e --1) + =. e (sum:si e ae) + =/ f (~(sub ^rq %n) m `@rq`0x3fff.0000.0000.0000.0000.0000.0000.0000) + =/ s (~(div ^rq %n) f (~(add ^rq %n) m `@rq`0x3fff.0000.0000.0000.0000.0000.0000.0000)) + =/ z (~(mul ^rq %n) s s) + =/ cs=(list @rq) + :~ `@rq`0x3ffd.5555.5555.5555.5555.5555.5555.5555 `@rq`0x3ffc.9999.9999.9999.9999.9999.9999.999a + `@rq`0x3ffc.2492.4924.9249.2492.4924.9249.2492 `@rq`0x3ffb.c71c.71c7.1c71.c71c.71c7.1c71.c71c + `@rq`0x3ffb.745d.1745.d174.5d17.45d1.745d.1746 `@rq`0x3ffb.3b13.b13b.13b1.3b13.b13b.13b1.3b14 + `@rq`0x3ffb.1111.1111.1111.1111.1111.1111.1111 `@rq`0x3ffa.e1e1.e1e1.e1e1.e1e1.e1e1.e1e1.e1e2 + `@rq`0x3ffa.af28.6bca.1af2.86bc.a1af.286b.ca1b `@rq`0x3ffa.8618.6186.1861.8618.6186.1861.8618 + `@rq`0x3ffa.642c.8590.b216.42c8.590b.2164.2c86 `@rq`0x3ffa.47ae.147a.e147.ae14.7ae1.47ae.147b + `@rq`0x3ffa.2f68.4bda.12f6.84bd.a12f.684b.da13 `@rq`0x3ffa.1a7b.9611.a7b9.611a.7b96.11a7.b961 + `@rq`0x3ffa.0842.1084.2108.4210.8421.0842.1084 `@rq`0x3ff9.f07c.1f07.c1f0.7c1f.07c1.f07c.1f08 + `@rq`0x3ff9.d41d.41d4.1d41.d41d.41d4.1d41.d41d `@rq`0x3ff9.bacf.914c.1bac.f914.c1ba.cf91.4c1c + `@rq`0x3ff9.a41a.41a4.1a41.a41a.41a4.1a41.a41a `@rq`0x3ff9.8f9c.18f9.c18f.9c18.f9c1.8f9c.18fa + `@rq`0x3ff9.7d05.f417.d05f.417d.05f4.17d0.5f41 `@rq`0x3ff9.6c16.c16c.16c1.6c16.c16c.16c1.6c17 + `@rq`0x3ff9.5c98.82b9.3105.7262.0ae4.c415.c988 + == + =/ p2 (roll (flop cs) |=([c=@rq acc=@rq] (~(add ^rq %n) (~(mul ^rq %n) acc z) c))) + =/ r (~(mul ^rq %n) (~(add ^rq %n) z z) p2) + =/ l1 (~(sub ^rq %n) f (~(mul ^rq %n) s (~(sub ^rq %n) f r))) + =/ efa (~(sun ^rq %n) (abs:si e)) + =/ ef ?:((syn:si e) efa (~(sub ^rq %n) `@rq`0x0 efa)) + =/ hi (~(mul ^rq %n) ef `@rq`0x3ffe.62e4.2fef.a39e.f357.93c8.0000.0000) + =/ lo (~(mul ^rq %n) ef `@rq`0xbfad.319f.f034.2542.fc32.f366.359d.274a) + (~(add ^rq %n) hi (~(add ^rq %n) l1 lo)) :: +log-10: @rq -> @rq :: :: Returns the base-10 logarithm of a floating-point atom. @@ -3226,17 +3802,62 @@ :: TODO :: Source ++ log-10 - |= z=@rq ^- @rq - (div (log z) log10) + |= x=@rq ^- @rq + ?: !(~(equ ^rq %n) x x) `@rq`0x7fff.8000.0000.0000.0000.0000.0000.0000 + ?: =(x `@rq`0x7fff.0000.0000.0000.0000.0000.0000.0000) `@rq`0x7fff.0000.0000.0000.0000.0000.0000.0000 + ?: |(=(x `@rq`0x0) =(x `@rq`0x8000.0000.0000.0000.0000.0000.0000.0000)) `@rq`0xffff.0000.0000.0000.0000.0000.0000.0000 + ?: =(1 (rsh [0 127] x)) `@rq`0x7fff.8000.0000.0000.0000.0000.0000.0000 + =/ el (lr x) + (~(add ^rq %n) (~(mul ^rq %n) ef.el `@rq`0x3ffd.3441.3509.f79f.ef31.1f12.b358.16f9) (~(mul ^rq %n) lm.el `@rq`0x3ffd.bcb7.b152.6e50.e32a.6ab7.555f.5a68)) :: +log-2: @rq -> @rq :: :: Returns the base-2 logarithm of a floating-point atom. :: Examples :: TODO :: Source + :: +lr: log reduction for finite positive @rq x -> [e (as @rq), log(mantissa)]. + ++ lr + |= x=@rq ^- [ef=@rq lm=@rq] + =/ sub =(0 (dis 0x7fff (rsh [0 112] x))) + =/ xx ?:(sub (~(mul ^rq %n) x `@rq`0x4077.0000.0000.0000.0000.0000.0000.0000) x) + =/ ae ?:(sub -120 --0) + =/ b `@`xx + =/ e (dif:si (new:si %.y (dis 0x7fff (rsh [0 112] b))) --16.383) + =/ m `@rq`(con (dis b 0xffff.ffff.ffff.ffff.ffff.ffff.ffff) 0x3fff.0000.0000.0000.0000.0000.0000.0000) + =/ big (~(gte ^rq %n) m `@rq`0x3fff.6a09.e667.f3bc.c908.b2fb.1366.ea95) + =? m big (~(mul ^rq %n) m `@rq`0x3ffe.0000.0000.0000.0000.0000.0000.0000) + =? e big (sum:si e --1) + =. e (sum:si e ae) + =/ f (~(sub ^rq %n) m `@rq`0x3fff.0000.0000.0000.0000.0000.0000.0000) + =/ s (~(div ^rq %n) f (~(add ^rq %n) m `@rq`0x3fff.0000.0000.0000.0000.0000.0000.0000)) + =/ z (~(mul ^rq %n) s s) + =/ cs=(list @rq) + :~ `@rq`0x3ffd.5555.5555.5555.5555.5555.5555.5555 `@rq`0x3ffc.9999.9999.9999.9999.9999.9999.999a + `@rq`0x3ffc.2492.4924.9249.2492.4924.9249.2492 `@rq`0x3ffb.c71c.71c7.1c71.c71c.71c7.1c71.c71c + `@rq`0x3ffb.745d.1745.d174.5d17.45d1.745d.1746 `@rq`0x3ffb.3b13.b13b.13b1.3b13.b13b.13b1.3b14 + `@rq`0x3ffb.1111.1111.1111.1111.1111.1111.1111 `@rq`0x3ffa.e1e1.e1e1.e1e1.e1e1.e1e1.e1e1.e1e2 + `@rq`0x3ffa.af28.6bca.1af2.86bc.a1af.286b.ca1b `@rq`0x3ffa.8618.6186.1861.8618.6186.1861.8618 + `@rq`0x3ffa.642c.8590.b216.42c8.590b.2164.2c86 `@rq`0x3ffa.47ae.147a.e147.ae14.7ae1.47ae.147b + `@rq`0x3ffa.2f68.4bda.12f6.84bd.a12f.684b.da13 `@rq`0x3ffa.1a7b.9611.a7b9.611a.7b96.11a7.b961 + `@rq`0x3ffa.0842.1084.2108.4210.8421.0842.1084 `@rq`0x3ff9.f07c.1f07.c1f0.7c1f.07c1.f07c.1f08 + `@rq`0x3ff9.d41d.41d4.1d41.d41d.41d4.1d41.d41d `@rq`0x3ff9.bacf.914c.1bac.f914.c1ba.cf91.4c1c + `@rq`0x3ff9.a41a.41a4.1a41.a41a.41a4.1a41.a41a `@rq`0x3ff9.8f9c.18f9.c18f.9c18.f9c1.8f9c.18fa + `@rq`0x3ff9.7d05.f417.d05f.417d.05f4.17d0.5f41 `@rq`0x3ff9.6c16.c16c.16c1.6c16.c16c.16c1.6c17 + `@rq`0x3ff9.5c98.82b9.3105.7262.0ae4.c415.c988 + == + =/ p2 (roll (flop cs) |=([c=@rq acc=@rq] (~(add ^rq %n) (~(mul ^rq %n) acc z) c))) + =/ r (~(mul ^rq %n) (~(add ^rq %n) z z) p2) + =/ l1 (~(sub ^rq %n) f (~(mul ^rq %n) s (~(sub ^rq %n) f r))) + =/ efa (~(sun ^rq %n) (abs:si e)) + [?:((syn:si e) efa (~(sub ^rq %n) `@rq`0x0 efa)) l1] ++ log-2 - |= z=@rq ^- @rq - (div (log z) log2) + |= x=@rq ^- @rq + ?: !(~(equ ^rq %n) x x) `@rq`0x7fff.8000.0000.0000.0000.0000.0000.0000 + ?: =(x `@rq`0x7fff.0000.0000.0000.0000.0000.0000.0000) `@rq`0x7fff.0000.0000.0000.0000.0000.0000.0000 + ?: |(=(x `@rq`0x0) =(x `@rq`0x8000.0000.0000.0000.0000.0000.0000.0000)) `@rq`0xffff.0000.0000.0000.0000.0000.0000.0000 + ?: =(1 (rsh [0 127] x)) `@rq`0x7fff.8000.0000.0000.0000.0000.0000.0000 + =/ el (lr x) + (~(add ^rq %n) ef.el (~(mul ^rq %n) lm.el `@rq`0x3fff.7154.7652.b82f.e177.7d0f.fda0.d23a)) :: +pow: [@rq @rq] -> @rq :: :: Returns the power of a floating-point atom to a floating-point exponent. @@ -3250,9 +3871,9 @@ :: Source ++ pow |= [x=@rq n=@rq] ^- @rq - :: fall through on positive integers (faster) - ?: &(=(n (san (need (toi n)))) (gth n .~~~0)) (pow-n x (san (need (toi n)))) - (exp (mul n (log x))) + ?: &(=(n (~(san ^rq %n) (need (~(toi ^rq %n) n)))) (~(gth ^rq %n) n `@rq`0x0)) + (pow-n x n) + (exp (~(mul ^rq %n) n (log x))) :: +sqrt: @rq -> @rq :: :: Returns the square root of a floating-point atom. @@ -3278,15 +3899,17 @@ :: .~~~316.2277660168379331998893544432718 :: Source ++ sqt + :: correctly-rounded f128 sqrt: stdlib seed + one Markstein FMA (matches + :: the SoftFloat f128_sqrt jet, tools/rq_check.c). |= x=@rq ^- @rq - ?> (sgn x) - ?: =(.~~~0 x) .~~~0 - =/ g=@rq (div x .~~~2) - |- - =/ n=@rq (mul .~~~0.5 (add g (div x g))) - ?. (gth (abs (sub g n)) rtol) - n - $(g n) + ?: !(~(equ ^rq %n) x x) `@rq`0x7fff.8000.0000.0000.0000.0000.0000.0000 + ?: =(x `@rq`0x7fff.0000.0000.0000.0000.0000.0000.0000) `@rq`0x7fff.0000.0000.0000.0000.0000.0000.0000 + ?: |(=(x `@rq`0x0) =(x `@rq`0x8000.0000.0000.0000.0000.0000.0000.0000)) x + ?: =(1 (rsh [0 127] x)) `@rq`0x7fff.8000.0000.0000.0000.0000.0000.0000 + =/ g (sqt:^rq x) + =/ h (~(div ^rq %n) `@rq`0x3ffe.0000.0000.0000.0000.0000.0000.0000 g) + =/ r (~(fma ^rq %n) (~(sub ^rq %n) `@rq`0x0 g) g x) + (~(fma ^rq %n) h r g) :: +cbrt: @rq -> @rq :: :: Returns the cube root of a floating-point atom. @@ -3313,8 +3936,12 @@ :: Source ++ cbt |= x=@rq ^- @rq - ?> (sgn x) - (pow x .~~~0.3333) + ?: !(~(equ ^rq %n) x x) x + ?: |(=(x `@rq`0x0) =(x `@rq`0x8000.0000.0000.0000.0000.0000.0000.0000)) x + =/ ax `@rq`(dis x 0x7fff.ffff.ffff.ffff.ffff.ffff.ffff.ffff) + =/ r (exp (~(mul ^rq %n) (log ax) `@rq`0x3ffd.5555.5555.5555.5555.5555.5555.5555)) + ?:(=(1 (rsh [0 127] x)) (~(sub ^rq %n) `@rq`0x0 r) r) + :: +arg: @rq -> @rq :: :: Returns the argument of a floating-point atom (real argument = absolute diff --git a/libmath/desk/tests/lib/math-ainv.hoon b/libmath/desk/tests/lib/math-ainv.hoon new file mode 100644 index 0000000..f0685b3 --- /dev/null +++ b/libmath/desk/tests/lib/math-ainv.hoon @@ -0,0 +1,40 @@ +:::: /tests/lib/math-ainv -- bit-exact asin/acos (PR #18). +:: fdlibm rational P/Q kernel (shared), sqrt head/tail split on [0.5,1). +:: Expected bits from libmath/tools/cheb_check.py. +:: +/+ *test, math +|% +++ sad |=(x=@rd ^-(@ `@`(~(asin rd:math [%n .~1e-10]) x))) +++ cad |=(x=@rd ^-(@ `@`(~(acos rd:math [%n .~1e-10]) x))) +++ sas |=(x=@rs ^-(@ `@`(~(asin rs:math [%n .1e-5]) x))) +++ cas |=(x=@rs ^-(@ `@`(~(acos rs:math [%n .1e-5]) x))) +:: ==== @rd asin ==== +++ test-asin-0 (expect-eq !>(`@`0x0) !>((sad `@rd`0x0))) +++ test-asin-half (expect-eq !>(`@`0x3fe0.c152.382d.7366) !>((sad `@rd`0x3fe0.0000.0000.0000))) +++ test-asin-1 (expect-eq !>(`@`0x3ff9.21fb.5444.2d18) !>((sad `@rd`0x3ff0.0000.0000.0000))) +++ test-asin-n1 (expect-eq !>(`@`0xbff9.21fb.5444.2d18) !>((sad `@rd`0xbff0.0000.0000.0000))) +++ test-asin-75 (expect-eq !>(`@`0x3feb.2353.15c6.80dc) !>((sad `@rd`0x3fe8.0000.0000.0000))) +++ test-asin-9 (expect-eq !>(`@`0x3ff1.ea93.705f.a172) !>((sad `@rd`0x3fec.cccc.cccc.cccd))) +++ test-asin-99 (expect-eq !>(`@`0x3ff6.de3c.6f33.d51d) !>((sad `@rd`0x3fef.ae14.7ae1.47ae))) +++ test-asin-n6 (expect-eq !>(`@`0xbfe4.978f.a326.9ee1) !>((sad `@rd`0xbfe3.3333.3333.3333))) +++ test-asin-nan (expect-eq !>(`@`0x7ff8.0000.0000.0000) !>((sad `@rd`0x7ff8.0000.0000.0000))) +++ test-asin-big (expect-eq !>(`@`0x7ff8.0000.0000.0000) !>((sad `@rd`0x3ff8.0000.0000.0000))) +:: ==== @rd acos ==== +++ test-acos-0 (expect-eq !>(`@`0x3ff9.21fb.5444.2d18) !>((cad `@rd`0x0))) +++ test-acos-half (expect-eq !>(`@`0x3ff0.c152.382d.7366) !>((cad `@rd`0x3fe0.0000.0000.0000))) +++ test-acos-1 (expect-eq !>(`@`0x0) !>((cad `@rd`0x3ff0.0000.0000.0000))) +++ test-acos-n1 (expect-eq !>(`@`0x4009.21fb.5444.2d18) !>((cad `@rd`0xbff0.0000.0000.0000))) +++ test-acos-9 (expect-eq !>(`@`0x3fdc.dd9f.8f92.2e98) !>((cad `@rd`0x3fec.cccc.cccc.cccd))) +++ test-acos-n9 (expect-eq !>(`@`0x4005.8647.6251.e745) !>((cad `@rd`0xbfec.cccc.cccc.cccd))) +++ test-acos-nan (expect-eq !>(`@`0x7ff8.0000.0000.0000) !>((cad `@rd`0x7ff8.0000.0000.0000))) +:: ==== @rs ==== +++ test-asin-s-half (expect-eq !>(`@`0x3f06.0a92) !>((sas `@rs`0x3f00.0000))) +++ test-asin-s-1 (expect-eq !>(`@`0x3fc9.0fdb) !>((sas `@rs`0x3f80.0000))) +++ test-asin-s-75 (expect-eq !>(`@`0x3f59.1a99) !>((sas `@rs`0x3f40.0000))) +++ test-asin-s-9 (expect-eq !>(`@`0x3f8f.549b) !>((sas `@rs`0x3f66.6666))) +++ test-asin-s-nan (expect-eq !>(`@`0x7fc0.0000) !>((sas `@rs`0x7fc0.0000))) +++ test-acos-s-0 (expect-eq !>(`@`0x3fc9.0fdb) !>((cas `@rs`0x0))) +++ test-acos-s-1 (expect-eq !>(`@`0x0) !>((cas `@rs`0x3f80.0000))) +++ test-acos-s-n1 (expect-eq !>(`@`0x4049.0fdb) !>((cas `@rs`0xbf80.0000))) +++ test-acos-s-n6 (expect-eq !>(`@`0x400d.b70d) !>((cas `@rs`0xbf19.999a))) +-- diff --git a/libmath/desk/tests/lib/math-atan.hoon b/libmath/desk/tests/lib/math-atan.hoon new file mode 100644 index 0000000..c8d3ef5 --- /dev/null +++ b/libmath/desk/tests/lib/math-atan.hoon @@ -0,0 +1,35 @@ +:::: /tests/lib/math-atan -- bit-exact atan + atan2 (PR #18). +:: atan: fdlibm breakpoint reduction + minimax poly. atan2 derives from atan +:: with quadrant logic. Expected bits from libmath/tools/cheb_check.py. +:: +/+ *test, math +|% +++ ad |=(x=@rd ^-(@ `@`(~(atan rd:math [%n .~1e-10]) x))) +++ as |=(x=@rs ^-(@ `@`(~(atan rs:math [%n .1e-5]) x))) +++ a2d |=([y=@rd x=@rd] ^-(@ `@`(~(atan2 rd:math [%n .~1e-10]) y x))) +:: ==== @rd ==== +++ test-atan-0 (expect-eq !>(`@`0x0) !>((ad `@rd`0x0))) +++ test-atan-half (expect-eq !>(`@`0x3fdd.ac67.0561.bb4f) !>((ad `@rd`0x3fe0.0000.0000.0000))) +++ test-atan-1 (expect-eq !>(`@`0x3fe9.21fb.5444.2d18) !>((ad `@rd`0x3ff0.0000.0000.0000))) +++ test-atan-n1 (expect-eq !>(`@`0xbfe9.21fb.5444.2d18) !>((ad `@rd`0xbff0.0000.0000.0000))) +++ test-atan-1h (expect-eq !>(`@`0x3fef.730b.d281.f69b) !>((ad `@rd`0x3ff8.0000.0000.0000))) +++ test-atan-2 (expect-eq !>(`@`0x3ff1.b6e1.92eb.be44) !>((ad `@rd`0x4000.0000.0000.0000))) +++ test-atan-10 (expect-eq !>(`@`0x3ff7.89bd.2c16.0054) !>((ad `@rd`0x4024.0000.0000.0000))) +++ test-atan-tenth (expect-eq !>(`@`0x3fb9.83e2.82e2.cc4d) !>((ad `@rd`0x3fb9.9999.9999.999a))) +++ test-atan-n07 (expect-eq !>(`@`0xbfe3.8b11.2d7b.d4ad) !>((ad `@rd`0xbfe6.6666.6666.6666))) +++ test-atan-inf (expect-eq !>(`@`0x3ff9.21fb.5444.2d18) !>((ad `@rd`0x7ff0.0000.0000.0000))) +++ test-atan-ninf (expect-eq !>(`@`0xbff9.21fb.5444.2d18) !>((ad `@rd`0xfff0.0000.0000.0000))) +++ test-atan-nan (expect-eq !>(`@`0x7ff8.0000.0000.0000) !>((ad `@rd`0x7ff8.0000.0000.0000))) +++ test-atan-n0 (expect-eq !>(`@`0x8000.0000.0000.0000) !>((ad `@rd`0x8000.0000.0000.0000))) +:: ==== @rs ==== +++ test-atan-s-half (expect-eq !>(`@`0x3eed.6338) !>((as `@rs`0x3f00.0000))) +++ test-atan-s-1 (expect-eq !>(`@`0x3f49.0fdb) !>((as `@rs`0x3f80.0000))) +++ test-atan-s-n1 (expect-eq !>(`@`0xbf49.0fdb) !>((as `@rs`0xbf80.0000))) +++ test-atan-s-2 (expect-eq !>(`@`0x3f8d.b70d) !>((as `@rs`0x4000.0000))) +++ test-atan-s-10 (expect-eq !>(`@`0x3fbc.4de9) !>((as `@rs`0x4120.0000))) +++ test-atan-s-inf (expect-eq !>(`@`0x3fc9.0fdb) !>((as `@rs`0x7f80.0000))) +++ test-atan-s-nan (expect-eq !>(`@`0x7fc0.0000) !>((as `@rs`0x7fc0.0000))) +:: ==== atan2 (derives from atan): quadrant + axis sanity ==== +++ test-atan2-q1 (expect-eq !>(`@`0x3fe9.21fb.5444.2d18) !>((a2d `@rd`0x3ff0.0000.0000.0000 `@rd`0x3ff0.0000.0000.0000))) +++ test-atan2-yaxis (expect-eq !>(`@`0x3ff9.21fb.5444.2d18) !>((a2d `@rd`0x3ff0.0000.0000.0000 `@rd`0x0))) +-- diff --git a/libmath/desk/tests/lib/math-derived.hoon b/libmath/desk/tests/lib/math-derived.hoon new file mode 100644 index 0000000..9a5b605 --- /dev/null +++ b/libmath/desk/tests/lib/math-derived.hoon @@ -0,0 +1,30 @@ +:::: /tests/lib/math-derived -- log-2, log-10, pow, cbrt (PR #18). +:: These derive from the now-accurate exp/log: log-2 = log/ln2, log-10 = +:: log/ln10, pow = exp(n*log x), cbrt = sign*exp(log|x|/3). They are +:: deterministic (so bit-exact-jettable via the same composition) but only +:: faithful to a few ULP (pow especially -- a notoriously hard function). +:: Expected bits from libmath/tools/cheb_check.py composing exp/log. +:: +/+ *test, math +|% +++ l2d |=(x=@rd ^-(@ `@`(~(log-2 rd:math [%n .~1e-10]) x))) +++ l10d |=(x=@rd ^-(@ `@`(~(log-10 rd:math [%n .~1e-10]) x))) +++ pd |=([x=@rd n=@rd] ^-(@ `@`(~(pow rd:math [%n .~1e-10]) x n))) +++ cbd |=(x=@rd ^-(@ `@`(~(cbt rd:math [%n .~1e-10]) x))) +++ l2s |=(x=@rs ^-(@ `@`(~(log-2 rs:math [%n .1e-5]) x))) +++ l10s |=(x=@rs ^-(@ `@`(~(log-10 rs:math [%n .1e-5]) x))) +++ ps |=([x=@rs n=@rs] ^-(@ `@`(~(pow rs:math [%n .1e-5]) x n))) +++ cbs |=(x=@rs ^-(@ `@`(~(cbt rs:math [%n .1e-5]) x))) +:: ==== @rd ==== +++ test-log2-8 (expect-eq !>(`@`0x4008.0000.0000.0000) !>((l2d `@rd`0x4020.0000.0000.0000))) +++ test-log10-1e3 (expect-eq !>(`@`0x4008.0000.0000.0000) !>((l10d `@rd`0x408f.4000.0000.0000))) +++ test-pow-2-half (expect-eq !>(`@`0x3ff6.a09e.667f.3bcc) !>((pd `@rd`0x4000.0000.0000.0000 `@rd`0x3fe0.0000.0000.0000))) +++ test-pow-3-2h (expect-eq !>(`@`0x402f.2d4a.4563.563f) !>((pd `@rd`0x4008.0000.0000.0000 `@rd`0x4004.0000.0000.0000))) +++ test-cbrt-27 (expect-eq !>(`@`0x4007.ffff.ffff.ffff) !>((cbd `@rd`0x403b.0000.0000.0000))) +++ test-cbrt-n8 (expect-eq !>(`@`0xbfff.ffff.ffff.ffff) !>((cbd `@rd`0xc020.0000.0000.0000))) +:: ==== @rs ==== +++ test-log2-8-s (expect-eq !>(`@`0x4040.0000) !>((l2s `@rs`0x4100.0000))) +++ test-log10-1e3-s (expect-eq !>(`@`0x4040.0001) !>((l10s `@rs`0x447a.0000))) +++ test-pow-2-half-s (expect-eq !>(`@`0x3fb5.04f3) !>((ps `@rs`0x4000.0000 `@rs`0x3f00.0000))) +++ test-cbrt-n8-s (expect-eq !>(`@`0xc000.0000) !>((cbs `@rs`0xc100.0000))) +-- diff --git a/libmath/desk/tests/lib/math-exp.hoon b/libmath/desk/tests/lib/math-exp.hoon new file mode 100644 index 0000000..a4f03de --- /dev/null +++ b/libmath/desk/tests/lib/math-exp.hoon @@ -0,0 +1,49 @@ +:::: /tests/lib/math-exp -- bit-exact exp for the Chebyshev rewrite (PR #18). +:: Expected bit patterns are produced by libmath/tools/cheb_check.py, whose +:: strict-f64/f32 reference is bit-identical to SoftFloat for the algorithm's +:: primitives (+ - * /, round-to-int, ldexp). Covers the normal range plus +:: the NaN / +-inf / overflow / underflow / subnormal tails. +:: +/+ *test, math +|% +++ es |=(x=@rs ^-(@ `@`(~(exp rs:math [%n .1e-5]) x))) +++ ed |=(x=@rd ^-(@ `@`(~(exp rd:math [%n .~1e-10]) x))) +:: ==== @rd core ==== +++ test-rd-0 (expect-eq !>(`@`0x3ff0.0000.0000.0000) !>((ed `@rd`0x0))) +++ test-rd-half (expect-eq !>(`@`0x3ffa.6129.8e1e.069c) !>((ed `@rd`0x3fe0.0000.0000.0000))) +++ test-rd-1 (expect-eq !>(`@`0x4005.bf0a.8b14.576a) !>((ed `@rd`0x3ff0.0000.0000.0000))) +++ test-rd-n1 (expect-eq !>(`@`0x3fd7.8b56.362c.ef38) !>((ed `@rd`0xbff0.0000.0000.0000))) +++ test-rd-2 (expect-eq !>(`@`0x401d.8e64.b8d4.ddae) !>((ed `@rd`0x4000.0000.0000.0000))) +++ test-rd-10 (expect-eq !>(`@`0x40d5.829d.cf95.0560) !>((ed `@rd`0x4024.0000.0000.0000))) +++ test-rd-n5 (expect-eq !>(`@`0x3f7b.993f.e00d.5376) !>((ed `@rd`0xc014.0000.0000.0000))) +++ test-rd-tenth (expect-eq !>(`@`0x3ff1.aec7.b35a.00d4) !>((ed `@rd`0x3fb9.9999.9999.999a))) +:: ==== @rd edges ==== +++ test-rd-pinf (expect-eq !>(`@`0x7ff0.0000.0000.0000) !>((ed `@rd`0x7ff0.0000.0000.0000))) +++ test-rd-ninf (expect-eq !>(`@`0x0) !>((ed `@rd`0xfff0.0000.0000.0000))) +++ test-rd-nan (expect-eq !>(`@`0x7ff8.0000.0000.0000) !>((ed `@rd`0x7ff8.0000.0000.0000))) +++ test-rd-big (expect-eq !>(`@`0x7fe8.1e9b.4b52.d0c9) !>((ed `@rd`0x4086.2c00.0000.0000))) +++ test-rd-ovf (expect-eq !>(`@`0x7ff0.0000.0000.0000) !>((ed `@rd`0x4086.3000.0000.0000))) +++ test-rd-ovf2 (expect-eq !>(`@`0x7ff0.0000.0000.0000) !>((ed `@rd`0x4086.8000.0000.0000))) +++ test-rd-sub (expect-eq !>(`@`0x2) !>((ed `@rd`0xc087.4000.0000.0000))) +++ test-rd-udf (expect-eq !>(`@`0x0) !>((ed `@rd`0xc087.4999.9999.999a))) +++ test-rd-udf2 (expect-eq !>(`@`0x0) !>((ed `@rd`0xc087.7000.0000.0000))) +:: ==== @rs core ==== +++ test-rs-0 (expect-eq !>(`@`0x3f80.0000) !>((es `@rs`0x0))) +++ test-rs-half (expect-eq !>(`@`0x3fd3.094d) !>((es `@rs`0x3f00.0000))) +++ test-rs-1 (expect-eq !>(`@`0x402d.f854) !>((es `@rs`0x3f80.0000))) +++ test-rs-n1 (expect-eq !>(`@`0x3ebc.5ab2) !>((es `@rs`0xbf80.0000))) +++ test-rs-2 (expect-eq !>(`@`0x40ec.7326) !>((es `@rs`0x4000.0000))) +++ test-rs-10 (expect-eq !>(`@`0x46ac.14ee) !>((es `@rs`0x4120.0000))) +++ test-rs-n5 (expect-eq !>(`@`0x3bdc.c9ff) !>((es `@rs`0xc0a0.0000))) +++ test-rs-tenth (expect-eq !>(`@`0x3f8d.763e) !>((es `@rs`0x3dcc.cccd))) +:: ==== @rs edges ==== +++ test-rs-pinf (expect-eq !>(`@`0x7f80.0000) !>((es `@rs`0x7f80.0000))) +++ test-rs-ninf (expect-eq !>(`@`0x0) !>((es `@rs`0xff80.0000))) +++ test-rs-nan (expect-eq !>(`@`0x7fc0.0000) !>((es `@rs`0x7fc0.0000))) +++ test-rs-big (expect-eq !>(`@`0x7ef8.82b7) !>((es `@rs`0x42b0.0000))) +++ test-rs-ovf (expect-eq !>(`@`0x7f80.0000) !>((es `@rs`0x42b2.0000))) +++ test-rs-ovf2 (expect-eq !>(`@`0x7f80.0000) !>((es `@rs`0x42c8.0000))) +++ test-rs-sub (expect-eq !>(`@`0x1) !>((es `@rs`0xc2ce.0000))) +++ test-rs-udf (expect-eq !>(`@`0x0) !>((es `@rs`0xc2d0.0000))) +++ test-rs-udf2 (expect-eq !>(`@`0x0) !>((es `@rs`0xc2dc.0000))) +-- diff --git a/libmath/desk/tests/lib/math-log.hoon b/libmath/desk/tests/lib/math-log.hoon new file mode 100644 index 0000000..ddfe349 --- /dev/null +++ b/libmath/desk/tests/lib/math-log.hoon @@ -0,0 +1,38 @@ +:::: /tests/lib/math-log -- bit-exact log for the Chebyshev rewrite (PR #18). +:: Expected bit patterns from libmath/tools/cheb_check.py (strict f64/f32 ref, +:: bit-identical to SoftFloat for the primitives used). Covers the normal +:: range plus NaN / +inf / -inf / +-0 / negative / subnormal inputs. +:: +/+ *test, math +|% +++ ls |=(x=@rs ^-(@ `@`(~(log rs:math [%n .1e-5]) x))) +++ ld |=(x=@rd ^-(@ `@`(~(log rd:math [%n .~1e-10]) x))) +:: ==== @rd ==== +++ test-rd-1 (expect-eq !>(`@`0x0) !>((ld `@rd`0x3ff0.0000.0000.0000))) +++ test-rd-2 (expect-eq !>(`@`0x3fe6.2e42.fefa.39ef) !>((ld `@rd`0x4000.0000.0000.0000))) +++ test-rd-half (expect-eq !>(`@`0xbfe6.2e42.fefa.39ef) !>((ld `@rd`0x3fe0.0000.0000.0000))) +++ test-rd-10 (expect-eq !>(`@`0x4002.6bb1.bbb5.5516) !>((ld `@rd`0x4024.0000.0000.0000))) +++ test-rd-100 (expect-eq !>(`@`0x4012.6bb1.bbb5.5516) !>((ld `@rd`0x4059.0000.0000.0000))) +++ test-rd-tenth (expect-eq !>(`@`0xc002.6bb1.bbb5.5515) !>((ld `@rd`0x3fb9.9999.9999.999a))) +++ test-rd-e2 (expect-eq !>(`@`0x4000.0000.0000.0000) !>((ld `@rd`0x401d.8e64.b8d4.ddae))) +++ test-rd-sub (expect-eq !>(`@`0xc086.4e69.394d.9508) !>((ld `@rd`0x1268.8b70.e62b))) +++ test-rd-pinf (expect-eq !>(`@`0x7ff0.0000.0000.0000) !>((ld `@rd`0x7ff0.0000.0000.0000))) +++ test-rd-ninf (expect-eq !>(`@`0x7ff8.0000.0000.0000) !>((ld `@rd`0xfff0.0000.0000.0000))) +++ test-rd-nan (expect-eq !>(`@`0x7ff8.0000.0000.0000) !>((ld `@rd`0x7ff8.0000.0000.0000))) +++ test-rd-zero (expect-eq !>(`@`0xfff0.0000.0000.0000) !>((ld `@rd`0x0))) +++ test-rd-neg (expect-eq !>(`@`0x7ff8.0000.0000.0000) !>((ld `@rd`0xbff0.0000.0000.0000))) +:: ==== @rs ==== +++ test-rs-1 (expect-eq !>(`@`0x0) !>((ls `@rs`0x3f80.0000))) +++ test-rs-2 (expect-eq !>(`@`0x3f31.7218) !>((ls `@rs`0x4000.0000))) +++ test-rs-half (expect-eq !>(`@`0xbf31.7218) !>((ls `@rs`0x3f00.0000))) +++ test-rs-10 (expect-eq !>(`@`0x4013.5d8e) !>((ls `@rs`0x4120.0000))) +++ test-rs-100 (expect-eq !>(`@`0x4093.5d8e) !>((ls `@rs`0x42c8.0000))) +++ test-rs-tenth (expect-eq !>(`@`0xc013.5d8e) !>((ls `@rs`0x3dcc.cccd))) +++ test-rs-e2 (expect-eq !>(`@`0x4000.0000) !>((ls `@rs`0x40ec.7326))) +++ test-rs-sub (expect-eq !>(`@`0xc2b8.34f2) !>((ls `@rs`0x1.16c2))) +++ test-rs-pinf (expect-eq !>(`@`0x7f80.0000) !>((ls `@rs`0x7f80.0000))) +++ test-rs-ninf (expect-eq !>(`@`0x7fc0.0000) !>((ls `@rs`0xff80.0000))) +++ test-rs-nan (expect-eq !>(`@`0x7fc0.0000) !>((ls `@rs`0x7fc0.0000))) +++ test-rs-zero (expect-eq !>(`@`0xff80.0000) !>((ls `@rs`0x0))) +++ test-rs-neg (expect-eq !>(`@`0x7fc0.0000) !>((ls `@rs`0xbf80.0000))) +-- diff --git a/libmath/desk/tests/lib/math-rh.hoon b/libmath/desk/tests/lib/math-rh.hoon new file mode 100644 index 0000000..85632fa --- /dev/null +++ b/libmath/desk/tests/lib/math-rh.hoon @@ -0,0 +1,41 @@ +:::: /tests/lib/math-rh -- @rh (f16) transcendentals via the @rs door (PR #18). +:: Each @rh function widens to f32, runs the (faithful) @rs kernel, and rounds +:: the result to f16 -- correctly-rounded, since an f32 result is ~2^13 finer +:: than an f16 ULP. Expected = numpy float16 of the true value. +:: +/+ *test, math +|% +++ eh |=(x=@rh ^-(@ `@`(~(exp rh:math [%n .~~1e-2]) x))) +++ lh |=(x=@rh ^-(@ `@`(~(log rh:math [%n .~~1e-2]) x))) +++ sh |=(x=@rh ^-(@ `@`(~(sin rh:math [%n .~~1e-2]) x))) +++ ch |=(x=@rh ^-(@ `@`(~(cos rh:math [%n .~~1e-2]) x))) +++ th |=(x=@rh ^-(@ `@`(~(tan rh:math [%n .~~1e-2]) x))) +++ ath |=(x=@rh ^-(@ `@`(~(atan rh:math [%n .~~1e-2]) x))) +++ ash |=(x=@rh ^-(@ `@`(~(asin rh:math [%n .~~1e-2]) x))) +++ ach |=(x=@rh ^-(@ `@`(~(acos rh:math [%n .~~1e-2]) x))) +++ qh |=(x=@rh ^-(@ `@`(~(sqt rh:math [%n .~~1e-2]) x))) +++ cbh |=(x=@rh ^-(@ `@`(~(cbt rh:math [%n .~~1e-2]) x))) +++ l2h |=(x=@rh ^-(@ `@`(~(log-2 rh:math [%n .~~1e-2]) x))) +++ l10h |=(x=@rh ^-(@ `@`(~(log-10 rh:math [%n .~~1e-2]) x))) +++ ph |=([x=@rh n=@rh] ^-(@ `@`(~(pow rh:math [%n .~~1e-2]) x n))) +++ test-exp-half (expect-eq !>(`@`0x3e98) !>((eh `@rh`0x3800))) +++ test-exp-1 (expect-eq !>(`@`0x4170) !>((eh `@rh`0x3c00))) +++ test-exp-n2 (expect-eq !>(`@`0x3055) !>((eh `@rh`0xc000))) +++ test-exp-inf (expect-eq !>(`@`0x7c00) !>((eh `@rh`0x7c00))) +++ test-log-2 (expect-eq !>(`@`0x398c) !>((lh `@rh`0x4000))) +++ test-log-half (expect-eq !>(`@`0xb98c) !>((lh `@rh`0x3800))) +++ test-sin-1 (expect-eq !>(`@`0x3abb) !>((sh `@rh`0x3c00))) +++ test-sin-pi (expect-eq !>(`@`0x13ed) !>((sh `@rh`0x4248))) +++ test-cos-1 (expect-eq !>(`@`0x3853) !>((ch `@rh`0x3c00))) +++ test-tan-1 (expect-eq !>(`@`0x3e3b) !>((th `@rh`0x3c00))) +++ test-atan-1 (expect-eq !>(`@`0x3a48) !>((ath `@rh`0x3c00))) +++ test-atan-2 (expect-eq !>(`@`0x3c6e) !>((ath `@rh`0x4000))) +++ test-asin-half (expect-eq !>(`@`0x3830) !>((ash `@rh`0x3800))) +++ test-acos-half (expect-eq !>(`@`0x3c30) !>((ach `@rh`0x3800))) +++ test-sqt-2 (expect-eq !>(`@`0x3da8) !>((qh `@rh`0x4000))) +++ test-sqt-10 (expect-eq !>(`@`0x4253) !>((qh `@rh`0x4900))) +++ test-cbt-8 (expect-eq !>(`@`0x4000) !>((cbh `@rh`0x4800))) +++ test-log2-8 (expect-eq !>(`@`0x4200) !>((l2h `@rh`0x4800))) +++ test-log10-1k (expect-eq !>(`@`0x4205) !>((l10h `@rh`0x6400))) +++ test-pow-2-h (expect-eq !>(`@`0x3da8) !>((ph `@rh`0x4000 `@rh`0x3800))) +-- diff --git a/libmath/desk/tests/lib/math-rq.hoon b/libmath/desk/tests/lib/math-rq.hoon new file mode 100644 index 0000000..cdbdfeb --- /dev/null +++ b/libmath/desk/tests/lib/math-rq.hoon @@ -0,0 +1,111 @@ +:::: /tests/lib/math-rq -- @rq (f128) transcendentals (PR #18). +:: Faithful native f128 (Cody-Waite/fdlibm at degree ~24). Expected bits are +:: the SoftFloat-f128 output of libmath/tools/rq_check.c (the jet basis), +:: cross-checked against MPFR truth. +:: +/+ *test, math +|% +++ eq |=(x=@rq ^-(@ `@`(~(exp rq:math [%n .~~~1e-10]) x))) +:: ==== exp ==== +++ test-exp-half %+ expect-eq !>(`@`0x3fff.a612.98e1.e069.bc97.2dfe.fab6.df34) + !>((eq `@rq`0x3ffe.0000.0000.0000.0000.0000.0000.0000)) +++ test-exp-1 %+ expect-eq !>(`@`0x4000.5bf0.a8b1.4576.9535.5fb8.ac40.4e7a) + !>((eq `@rq`0x3fff.0000.0000.0000.0000.0000.0000.0000)) +++ test-exp-2 %+ expect-eq !>(`@`0x4001.d8e6.4b8d.4dda.dcc3.3a3b.a206.b68b) + !>((eq `@rq`0x4000.0000.0000.0000.0000.0000.0000.0000)) +++ test-exp-n2 %+ expect-eq !>(`@`0x3ffc.152a.aa3b.f81c.b9fd.b76e.ae12.d029) + !>((eq `@rq`0xc000.0000.0000.0000.0000.0000.0000.0000)) +++ test-exp-10 %+ expect-eq !>(`@`0x400d.5829.dcf9.5055.f9f0.7ea8.c056.d135) + !>((eq `@rq`0x4002.4000.0000.0000.0000.0000.0000.0000)) +++ test-exp-0 %+ expect-eq !>(`@`0x3fff.0000.0000.0000.0000.0000.0000.0000) + !>((eq `@rq`0x0)) +++ test-exp-inf %+ expect-eq !>(`@`0x7fff.0000.0000.0000.0000.0000.0000.0000) + !>((eq `@rq`0x7fff.0000.0000.0000.0000.0000.0000.0000)) +++ test-exp-ninf %+ expect-eq !>(`@`0x0) + !>((eq `@rq`0xffff.0000.0000.0000.0000.0000.0000.0000)) +++ test-exp-nan %+ expect-eq !>(`@`0x7fff.8000.0000.0000.0000.0000.0000.0000) + !>((eq `@rq`0x7fff.8000.0000.0000.0000.0000.0000.0000)) +++ lq |=(x=@rq ^-(@ `@`(~(log rq:math [%n .~~~1e-10]) x))) +++ test-log-2 %+ expect-eq !>(`@`0x3ffe.62e4.2fef.a39e.f357.93c7.6730.07e6) + !>((lq `@rq`0x4000.0000.0000.0000.0000.0000.0000.0000)) +++ test-log-10 %+ expect-eq !>(`@`0x4000.26bb.1bbb.5551.582d.d4ad.ac57.05a6) + !>((lq `@rq`0x4002.4000.0000.0000.0000.0000.0000.0000)) +++ test-log-half %+ expect-eq !>(`@`0xbffe.62e4.2fef.a39e.f357.93c7.6730.07e6) + !>((lq `@rq`0x3ffe.0000.0000.0000.0000.0000.0000.0000)) +++ test-log-100 %+ expect-eq !>(`@`0x4001.26bb.1bbb.5551.582d.d4ad.ac57.05a6) + !>((lq `@rq`0x4005.9000.0000.0000.0000.0000.0000.0000)) +++ sq |=(x=@rq ^-(@ `@`(~(sin rq:math [%n .~~~1e-10]) x))) +++ cq |=(x=@rq ^-(@ `@`(~(cos rq:math [%n .~~~1e-10]) x))) +++ qq |=(x=@rq ^-(@ `@`(~(sqt rq:math [%n .~~~1e-10]) x))) +++ test-sin-1 %+ expect-eq !>(`@`0x3ffe.aed5.48f0.90ce.e041.8dd3.d213.8a1e) + !>((sq `@rq`0x3fff.0000.0000.0000.0000.0000.0000.0000)) +++ test-sin-2 %+ expect-eq !>(`@`0x3ffe.d18f.6ead.1b44.5dfa.b848.1880.09ca) + !>((sq `@rq`0x4000.0000.0000.0000.0000.0000.0000.0000)) +++ test-sin-half %+ expect-eq !>(`@`0x3ffd.eaee.8744.b05e.fe87.64bc.364f.d838) + !>((sq `@rq`0x3ffe.0000.0000.0000.0000.0000.0000.0000)) +++ test-cos-1 %+ expect-eq !>(`@`0x3ffe.14a2.80fb.5068.b923.848c.db2e.d0e4) + !>((cq `@rq`0x3fff.0000.0000.0000.0000.0000.0000.0000)) +++ test-cos-2 %+ expect-eq !>(`@`0xbffd.aa22.6575.3720.4a43.32f8.acbb.72b1) + !>((cq `@rq`0x4000.0000.0000.0000.0000.0000.0000.0000)) +++ test-sqt-2 %+ expect-eq !>(`@`0x3fff.6a09.e667.f3bc.c908.b2fb.1366.ea95) + !>((qq `@rq`0x4000.0000.0000.0000.0000.0000.0000.0000)) +++ test-sqt-10 %+ expect-eq !>(`@`0x4000.94c5.83ad.a5b5.2920.4a2b.c830.cd9c) + !>((qq `@rq`0x4002.4000.0000.0000.0000.0000.0000.0000)) +++ test-sqt-4 %+ expect-eq !>(`@`0x4000.0000.0000.0000.0000.0000.0000.0000) + !>((qq `@rq`0x4001.0000.0000.0000.0000.0000.0000.0000)) +++ l2 |=(x=@rq ^-(@ `@`(~(log-2 rq:math [%n .~~~1e-10]) x))) +++ lt |=(x=@rq ^-(@ `@`(~(log-10 rq:math [%n .~~~1e-10]) x))) +++ pw |=([x=@rq n=@rq] ^-(@ `@`(~(pow rq:math [%n .~~~1e-10]) x n))) +++ cb |=(x=@rq ^-(@ `@`(~(cbt rq:math [%n .~~~1e-10]) x))) +++ test-log2-8 %+ expect-eq !>(`@`0x4000.8000.0000.0000.0000.0000.0000.0000) + !>((l2 `@rq`0x4002.0000.0000.0000.0000.0000.0000.0000)) +++ test-log2-1024 %+ expect-eq !>(`@`0x4002.4000.0000.0000.0000.0000.0000.0000) + !>((l2 `@rq`0x4009.0000.0000.0000.0000.0000.0000.0000)) +++ test-log10-1k %+ expect-eq !>(`@`0x4000.8000.0000.0000.0000.0000.0000.0000) + !>((lt `@rq`0x4008.f400.0000.0000.0000.0000.0000.0000)) +++ test-log10-100 %+ expect-eq !>(`@`0x4000.0000.0000.0000.0000.0000.0000.0000) + !>((lt `@rq`0x4005.9000.0000.0000.0000.0000.0000.0000)) +++ test-pow-2-10 %+ expect-eq !>(`@`0x4009.0000.0000.0000.0000.0000.0000.0000) + !>((pw `@rq`0x4000.0000.0000.0000.0000.0000.0000.0000 `@rq`0x4002.4000.0000.0000.0000.0000.0000.0000)) +++ test-pow-3-2 %+ expect-eq !>(`@`0x4002.2000.0000.0000.0000.0000.0000.0000) + !>((pw `@rq`0x4000.8000.0000.0000.0000.0000.0000.0000 `@rq`0x4000.0000.0000.0000.0000.0000.0000.0000)) +++ test-pow-2-h %+ expect-eq !>(`@`0x3fff.6a09.e667.f3bc.c908.b2fb.1366.ea96) + !>((pw `@rq`0x4000.0000.0000.0000.0000.0000.0000.0000 `@rq`0x3ffe.0000.0000.0000.0000.0000.0000.0000)) +++ test-cbrt-8 %+ expect-eq !>(`@`0x3fff.ffff.ffff.ffff.ffff.ffff.ffff.ffff) + !>((cb `@rq`0x4002.0000.0000.0000.0000.0000.0000.0000)) +++ test-cbrt-n27 %+ expect-eq !>(`@`0xc000.7fff.ffff.ffff.ffff.ffff.ffff.ffff) + !>((cb `@rq`0xc003.b000.0000.0000.0000.0000.0000.0000)) +++ at |=(x=@rq ^-(@ `@`(~(atan rq:math [%n .~~~1e-10]) x))) +++ test-atan-half %+ expect-eq !>(`@`0x3ffd.dac6.7056.1bb4.f68a.dfc8.8bd9.7875) + !>((at `@rq`0x3ffe.0000.0000.0000.0000.0000.0000.0000)) +++ test-atan-1 %+ expect-eq !>(`@`0x3ffe.921f.b544.42d1.8469.898c.c517.01b8) + !>((at `@rq`0x3fff.0000.0000.0000.0000.0000.0000.0000)) +++ test-atan-2 %+ expect-eq !>(`@`0x3fff.1b6e.192e.bbe4.46c6.d19a.a220.a39b) + !>((at `@rq`0x4000.0000.0000.0000.0000.0000.0000.0000)) +++ test-atan-n1 %+ expect-eq !>(`@`0xbffe.921f.b544.42d1.8469.898c.c517.01b8) + !>((at `@rq`0xbfff.0000.0000.0000.0000.0000.0000.0000)) +++ test-atan-10 %+ expect-eq !>(`@`0x3fff.789b.d2c1.6005.382e.abf0.cd4b.6aae) + !>((at `@rq`0x4002.4000.0000.0000.0000.0000.0000.0000)) +++ test-atan-tenth %+ expect-eq !>(`@`0x3ffb.983e.282e.2cc4.c3ad.d9bf.7cb9.709f) + !>((at `@rq`0x3ffb.9999.9999.9999.9999.9999.9999.999a)) +++ as |=(x=@rq ^-(@ `@`(~(asin rq:math [%n .~~~1e-10]) x))) +++ ac |=(x=@rq ^-(@ `@`(~(acos rq:math [%n .~~~1e-10]) x))) +++ test-asin-half %+ expect-eq !>(`@`0x3ffe.0c15.2382.d736.5846.5bb3.2e0f.567b) + !>((as `@rq`0x3ffe.0000.0000.0000.0000.0000.0000.0000)) +++ test-asin-75 %+ expect-eq !>(`@`0x3ffe.b235.315c.680d.c081.583d.b360.d5e2) + !>((as `@rq`0x3ffe.8000.0000.0000.0000.0000.0000.0000)) +++ test-asin-n6 %+ expect-eq !>(`@`0xbffe.4978.fa32.69ee.1248.3350.fe54.8afb) + !>((as `@rq`0xbffe.3333.3333.3333.3333.3333.3333.3333)) +++ test-asin-1 %+ expect-eq !>(`@`0x3fff.921f.b544.42d1.8469.898c.c517.01b8) + !>((as `@rq`0x3fff.0000.0000.0000.0000.0000.0000.0000)) +++ test-acos-half %+ expect-eq !>(`@`0x3fff.0c15.2382.d736.5846.5bb3.2e0f.567b) + !>((ac `@rq`0x3ffe.0000.0000.0000.0000.0000.0000.0000)) +++ test-acos-75 %+ expect-eq !>(`@`0x3ffe.720a.392c.1d95.4851.badb.d6cd.2d8e) + !>((ac `@rq`0x3ffe.8000.0000.0000.0000.0000.0000.0000)) +++ test-acos-n6 %+ expect-eq !>(`@`0x4000.1b6e.192e.bbe4.46c6.d19a.a220.a39b) + !>((ac `@rq`0xbffe.3333.3333.3333.3333.3333.3333.3333)) +++ test-acos-1 %+ expect-eq !>(`@`0x0) + !>((ac `@rq`0x3fff.0000.0000.0000.0000.0000.0000.0000)) +++ test-acos-n1 %+ expect-eq !>(`@`0x4000.921f.b544.42d1.8469.898c.c517.01b8) + !>((ac `@rq`0xbfff.0000.0000.0000.0000.0000.0000.0000)) +-- diff --git a/libmath/desk/tests/lib/math-sqrt.hoon b/libmath/desk/tests/lib/math-sqrt.hoon new file mode 100644 index 0000000..2004847 --- /dev/null +++ b/libmath/desk/tests/lib/math-sqrt.hoon @@ -0,0 +1,30 @@ +:::: /tests/lib/math-sqrt -- correctly-rounded sqrt (PR #18). +:: @rs delegates to the SoftFloat f32 root; @rd seeds with the (faithful) +:: stdlib f64 root and applies one Markstein FMA correction. Expected values +:: are the correctly-rounded roots (mpmath). +:: +/+ *test, math +|% +++ qd |=(x=@rd ^-(@ `@`(~(sqt rd:math [%n .~1e-10]) x))) +++ qs |=(x=@rs ^-(@ `@`(~(sqt rs:math [%n .1e-5]) x))) +:: ==== @rd ==== +++ test-rd-2 (expect-eq !>(`@`0x3ff6.a09e.667f.3bcd) !>((qd `@rd`0x4000.0000.0000.0000))) +++ test-rd-half (expect-eq !>(`@`0x3fe6.a09e.667f.3bcd) !>((qd `@rd`0x3fe0.0000.0000.0000))) +++ test-rd-4 (expect-eq !>(`@`0x4000.0000.0000.0000) !>((qd `@rd`0x4010.0000.0000.0000))) +++ test-rd-1e5 (expect-eq !>(`@`0x4073.c3a4.edfa.9759) !>((qd `@rd`0x40f8.6a00.0000.0000))) +++ test-rd-tenth (expect-eq !>(`@`0x3fd4.3d13.6248.490f) !>((qd `@rd`0x3fb9.9999.9999.999a))) +++ test-rd-sub (expect-eq !>(`@`0x20ca.2fe7.6a3f.9475) !>((qd `@rd`0x1a5.6e1f.c2f8.f359))) +++ test-rd-9 (expect-eq !>(`@`0x4008.0000.0000.0000) !>((qd `@rd`0x4022.0000.0000.0000))) +++ test-rd-0 (expect-eq !>(`@`0x0) !>((qd `@rd`0x0))) +++ test-rd-n0 (expect-eq !>(`@`0x8000.0000.0000.0000) !>((qd `@rd`0x8000.0000.0000.0000))) +++ test-rd-neg (expect-eq !>(`@`0x7ff8.0000.0000.0000) !>((qd `@rd`0xbff0.0000.0000.0000))) +++ test-rd-inf (expect-eq !>(`@`0x7ff0.0000.0000.0000) !>((qd `@rd`0x7ff0.0000.0000.0000))) +++ test-rd-nan (expect-eq !>(`@`0x7ff8.0000.0000.0000) !>((qd `@rd`0x7ff8.0000.0000.0000))) +:: ==== @rs ==== +++ test-rs-2 (expect-eq !>(`@`0x3fb5.04f3) !>((qs `@rs`0x4000.0000))) +++ test-rs-half (expect-eq !>(`@`0x3f35.04f3) !>((qs `@rs`0x3f00.0000))) +++ test-rs-4 (expect-eq !>(`@`0x4000.0000) !>((qs `@rs`0x4080.0000))) +++ test-rs-1e5 (expect-eq !>(`@`0x439e.1d27) !>((qs `@rs`0x47c3.5000))) +++ test-rs-tenth (expect-eq !>(`@`0x3ea1.e89b) !>((qs `@rs`0x3dcc.cccd))) +++ test-rs-9 (expect-eq !>(`@`0x4040.0000) !>((qs `@rs`0x4110.0000))) +-- diff --git a/libmath/desk/tests/lib/math-tan.hoon b/libmath/desk/tests/lib/math-tan.hoon new file mode 100644 index 0000000..fb4ffc0 --- /dev/null +++ b/libmath/desk/tests/lib/math-tan.hoon @@ -0,0 +1,20 @@ +:::: /tests/lib/math-tan -- bit-exact @rd tan (dedicated fdlibm kernel, PR #18). +:: @rd tan now uses __kernel_tan (faithful <=1 ULP); @rs stays as the sin/cos +:: ratio (~1.2 ULP -- a pure-f32 tan kernel is worse near the poles). +:: Expected bits from libmath/tools/cheb_check.py. +:: +/+ *test, math +|% +++ td |=(x=@rd ^-(@ `@`(~(tan rd:math [%n .~1e-10]) x))) +++ test-tan-0 (expect-eq !>(`@`0x0) !>((td `@rd`0x0))) +++ test-tan-half (expect-eq !>(`@`0x3fe1.7b4f.5bf3.474a) !>((td `@rd`0x3fe0.0000.0000.0000))) +++ test-tan-1 (expect-eq !>(`@`0x3ff8.eb24.5cbe.e3a6) !>((td `@rd`0x3ff0.0000.0000.0000))) +++ test-tan-n1 (expect-eq !>(`@`0xbff8.eb24.5cbe.e3a6) !>((td `@rd`0xbff0.0000.0000.0000))) +++ test-tan-pio4 (expect-eq !>(`@`0x3fef.ffff.ffff.ffff) !>((td `@rd`0x3fe9.21fb.5444.2d18))) +++ test-tan-2 (expect-eq !>(`@`0xc001.7af6.2e09.50f8) !>((td `@rd`0x4000.0000.0000.0000))) +++ test-tan-10 (expect-eq !>(`@`0x3fe4.bf5f.34be.3782) !>((td `@rd`0x4024.0000.0000.0000))) +++ test-tan-100 (expect-eq !>(`@`0xbfe2.ca74.d62b.5d38) !>((td `@rd`0x4059.0000.0000.0000))) +++ test-tan-inf (expect-eq !>(`@`0x7ff8.0000.0000.0000) !>((td `@rd`0x7ff0.0000.0000.0000))) +++ test-tan-nan (expect-eq !>(`@`0x7ff8.0000.0000.0000) !>((td `@rd`0x7ff8.0000.0000.0000))) +++ test-tan-n0 (expect-eq !>(`@`0x8000.0000.0000.0000) !>((td `@rd`0x8000.0000.0000.0000))) +-- diff --git a/libmath/desk/tests/lib/math-trig.hoon b/libmath/desk/tests/lib/math-trig.hoon new file mode 100644 index 0000000..fae686c --- /dev/null +++ b/libmath/desk/tests/lib/math-trig.hoon @@ -0,0 +1,46 @@ +:::: /tests/lib/math-trig -- bit-exact sin/cos for the Chebyshev rewrite (#18). +:: Expected bit patterns from libmath/tools/cheb_check.py. Covers the normal +:: range, large arguments, and the NaN / +-inf / +-0 edges. +:: +/+ *test, math +|% +++ sd |=(x=@rd ^-(@ `@`(~(sin rd:math [%n .~1e-10]) x))) +++ cd |=(x=@rd ^-(@ `@`(~(cos rd:math [%n .~1e-10]) x))) +++ ss |=(x=@rs ^-(@ `@`(~(sin rs:math [%n .1e-5]) x))) +++ cs |=(x=@rs ^-(@ `@`(~(cos rs:math [%n .1e-5]) x))) +:: ==== @rd ==== +++ test-sin-0 (expect-eq !>(`@`0x0) !>((sd `@rd`0x0))) +++ test-sin-half (expect-eq !>(`@`0x3fde.aee8.744b.05f0) !>((sd `@rd`0x3fe0.0000.0000.0000))) +++ test-sin-1 (expect-eq !>(`@`0x3fea.ed54.8f09.0cee) !>((sd `@rd`0x3ff0.0000.0000.0000))) +++ test-sin-n1 (expect-eq !>(`@`0xbfea.ed54.8f09.0cee) !>((sd `@rd`0xbff0.0000.0000.0000))) +++ test-sin-pio2 (expect-eq !>(`@`0x3ff0.0000.0000.0000) !>((sd `@rd`0x3ff9.21fb.5444.2d18))) +++ test-sin-10 (expect-eq !>(`@`0xbfe1.689e.f5f3.4f52) !>((sd `@rd`0x4024.0000.0000.0000))) +++ test-sin-100 (expect-eq !>(`@`0xbfe0.3425.b78c.4db8) !>((sd `@rd`0x4059.0000.0000.0000))) +++ test-sin-inf (expect-eq !>(`@`0x7ff8.0000.0000.0000) !>((sd `@rd`0x7ff0.0000.0000.0000))) +++ test-sin-nan (expect-eq !>(`@`0x7ff8.0000.0000.0000) !>((sd `@rd`0x7ff8.0000.0000.0000))) +++ test-sin-n0 (expect-eq !>(`@`0x8000.0000.0000.0000) !>((sd `@rd`0x8000.0000.0000.0000))) +++ test-cos-0 (expect-eq !>(`@`0x3ff0.0000.0000.0000) !>((cd `@rd`0x0))) +++ test-cos-half (expect-eq !>(`@`0x3fec.1528.065b.7d50) !>((cd `@rd`0x3fe0.0000.0000.0000))) +++ test-cos-1 (expect-eq !>(`@`0x3fe1.4a28.0fb5.068c) !>((cd `@rd`0x3ff0.0000.0000.0000))) +++ test-cos-pi (expect-eq !>(`@`0xbff0.0000.0000.0000) !>((cd `@rd`0x4009.21fb.5444.2d18))) +++ test-cos-10 (expect-eq !>(`@`0xbfea.d9ac.890c.6b1f) !>((cd `@rd`0x4024.0000.0000.0000))) +++ test-cos-100 (expect-eq !>(`@`0x3feb.981d.bf66.5fdf) !>((cd `@rd`0x4059.0000.0000.0000))) +++ test-cos-inf (expect-eq !>(`@`0x7ff8.0000.0000.0000) !>((cd `@rd`0xfff0.0000.0000.0000))) +++ test-cos-n0 (expect-eq !>(`@`0x3ff0.0000.0000.0000) !>((cd `@rd`0x8000.0000.0000.0000))) +:: ==== @rs ==== +++ test-sin-s-0 (expect-eq !>(`@`0x0) !>((ss `@rs`0x0))) +++ test-sin-s-half (expect-eq !>(`@`0x3ef5.7744) !>((ss `@rs`0x3f00.0000))) +++ test-sin-s-1 (expect-eq !>(`@`0x3f57.6aa4) !>((ss `@rs`0x3f80.0000))) +++ test-sin-s-n1 (expect-eq !>(`@`0xbf57.6aa4) !>((ss `@rs`0xbf80.0000))) +++ test-sin-s-10 (expect-eq !>(`@`0xbf0b.44f8) !>((ss `@rs`0x4120.0000))) +++ test-sin-s-100 (expect-eq !>(`@`0xbf01.a12e) !>((ss `@rs`0x42c8.0000))) +++ test-sin-s-inf (expect-eq !>(`@`0x7fc0.0000) !>((ss `@rs`0x7f80.0000))) +++ test-sin-s-n0 (expect-eq !>(`@`0x8000.0000) !>((ss `@rs`0x8000.0000))) +++ test-cos-s-0 (expect-eq !>(`@`0x3f80.0000) !>((cs `@rs`0x0))) +++ test-cos-s-half (expect-eq !>(`@`0x3f60.a940) !>((cs `@rs`0x3f00.0000))) +++ test-cos-s-1 (expect-eq !>(`@`0x3f0a.5140) !>((cs `@rs`0x3f80.0000))) +++ test-cos-s-10 (expect-eq !>(`@`0xbf56.cd64) !>((cs `@rs`0x4120.0000))) +++ test-cos-s-100 (expect-eq !>(`@`0x3f5c.c0ee) !>((cs `@rs`0x42c8.0000))) +++ test-cos-s-inf (expect-eq !>(`@`0x7fc0.0000) !>((cs `@rs`0xff80.0000))) +++ test-cos-s-n0 (expect-eq !>(`@`0x3f80.0000) !>((cs `@rs`0x8000.0000))) +-- diff --git a/libmath/tools/cheb_check.py b/libmath/tools/cheb_check.py new file mode 100644 index 0000000..de91deb --- /dev/null +++ b/libmath/tools/cheb_check.py @@ -0,0 +1,862 @@ +#!/usr/bin/env python3 +"""Chebyshev/minimax transcendental harness for the Hoon<->jet bit-exact effort. + +Design (see numerics PR #18): + - The algorithm uses ONLY correctly-rounded f64 primitives (+ - * /, round-to- + int, ldexp). For those, IEEE round-nearest-even is bit-identical in strict + f64 (no FMA / no x87 extended precision), in Berkeley SoftFloat, and in the + Hoon `fl` engine. So this strict-f64 Python reference yields the exact bits + the Hoon @rd implementation and the SoftFloat C jet must reproduce. + - mpmath (arbitrary precision) is the INDEPENDENT truth: it designs the + coefficients and proves the faithful-rounding (<= 1 ULP) bound. It is NOT + the test oracle; the algorithm-of-record (this file) is. + +Run: python3 libmath/tools/cheb_check.py exp +Requires: mpmath, numpy. +""" +import sys, struct, math +import mpmath as mp +import numpy as np +mp.mp.dps = 60 + +def f64(x): # force a Python float (strict IEEE f64) + return struct.unpack('>d', struct.pack('>d', float(x)))[0] +def bits(x): return struct.unpack('>Q', struct.pack('>d', f64(x)))[0] +def hexd(x): return f"0x{bits(x):016x}" +def of_bits(b): return struct.unpack('>d', struct.pack('>Q', b))[0] + +def ulps(approx, true_mpf): # signed ULP error of f64 `approx` vs true value + a = f64(approx) + if a == 0: a = 0.0 + # nextafter-based ULP at a + up = math.nextafter(a, math.inf); ulp = up - a if up != a else abs(a) * 2**-52 + if ulp == 0: ulp = 2**-1074 + return float((mp.mpf(a) - true_mpf) / mp.mpf(ulp)) + +# ---- exp @rd: x = k*ln2 + r, r in [-ln2/2, ln2/2]; exp = 2^k * P(r) ---- +LOG2E = f64(1 / mp.log(2)) +# Cody-Waite split of ln2 (fdlibm): ln2hi exact in the top bits so k*ln2hi is exact +LN2HI = f64('6.93147180369123816490e-01') +LN2LO = f64('1.90821492927058770002e-10') + +def gen_exp_coeffs(deg): + """Near-minimax monomial coeffs for exp on [-ln2/2, ln2/2] via mpmath's + high-precision Chebyshev fit (avoids numpy lstsq ill-conditioning), each + coefficient then rounded to f64.""" + half = mp.log(2) / 2 + cs = mp.chebyfit(lambda r: mp.e ** r, [-half, half], deg + 1) # highest-first + return [f64(c) for c in reversed(cs)] # ascending c0..cN + +def horner(coeffs, r): # ascending coeffs; strict-f64 Horner + acc = f64(coeffs[-1]) + for c in reversed(coeffs[:-1]): + acc = f64(f64(acc * r) + c) + return acc + +INF = math.inf +def exp_f64(x, coeffs): + x = f64(x) + if x != x: return float('nan') # NaN -> NaN + if x == INF: return INF # +inf -> +inf + if x == -INF: return 0.0 # -inf -> 0 + k = int(math.floor(f64(x * LOG2E) + 0.5)) # round-nearest to int + if k >= 1025: return INF # overflow + if k <= -1076: return 0.0 # underflow + r = f64(f64(x - f64(k * LN2HI)) - f64(k * LN2LO)) + p = horner(coeffs, r) + try: return f64(math.ldexp(p, k)) # correctly-rounded scale (incl. subnormal) + except OverflowError: return INF + +def check_exp(): + deg = 11 + coeffs = gen_exp_coeffs(deg) + print(f"# exp @rd: Cody-Waite reduction + degree-{deg} minimax poly") + print(f"LOG2E = {hexd(LOG2E)} LN2HI = {hexd(LN2HI)} LN2LO = {hexd(LN2LO)}") + print("coeffs (ascending, c0..c%d), as f64 hex:" % deg) + for i, c in enumerate(coeffs): + print(f" c{i:<2} = {hexd(c)} ({c!r})") + # faithfulness sweep + worst = 0.0; xw = None + xs = [mp.mpf(t)/1000 for t in range(-20000, 20001, 7)] # x in [-20, 20] + for xm in xs: + x = f64(xm) + got = exp_f64(x, coeffs) + tru = mp.e ** mp.mpf(x) + if tru == 0 or not math.isfinite(got): continue + e = abs(ulps(got, tru)) + if e > worst: worst, xw = e, x + print(f"max error over x in [-20,20]: {worst:.3f} ULP at x={xw}") + # expected values for the Hoon test + print("expected (input -> output) bit patterns:") + for x in [0.0, 0.5, 1.0, -1.0, 2.0, 10.0, -5.0, 0.1]: + print(f" exp({x:+}) -> {hexd(exp_f64(x, coeffs))} in={hexd(x)}") + print("edge cases (in -> out):") + edges = [('+inf', INF), ('-inf', -INF), ('nan', float('nan')), + ('709.5', 709.5), ('710.0', 710.0), ('720.0', 720.0), + ('-744.0', -744.0), ('-745.2', -745.2), ('-750.0', -750.0)] + for name, x in edges: + o = exp_f64(x, coeffs) + ib = "0x7ff0000000000000" if x==INF else "0xfff0000000000000" if x==-INF else \ + "0x7ff8000000000000" if x!=x else hexd(x) + print(f" exp({name:>7}) -> {hexd(o):>18} in={ib}") + +# ============================ @rs (f32) ============================ +def f32(x): return struct.unpack('>f', struct.pack('>f', float(x)))[0] +def bits32(x):return struct.unpack('>I', struct.pack('>f', f32(x)))[0] +def hexs(x): return f"0x{bits32(x):08x}" + +def ulps32(approx, true_mpf): + a = f32(approx) + up = f32(math.nextafter(a, math.inf)); ulp = up - a if up != a else abs(a) * 2**-23 + if ulp == 0: ulp = 2**-149 + return float((mp.mpf(a) - true_mpf) / mp.mpf(ulp)) + +# Cody-Waite split of ln2 for f32 (fdlibm expf): low mantissa bits of HI are 0 +LOG2E_S = f32(1 / mp.log(2)) +LN2HI_S = struct.unpack('>f', struct.pack('>I', 0x3f317200))[0] # 6.9314575195e-1 +LN2LO_S = struct.unpack('>f', struct.pack('>I', 0x35bfbe8e))[0] # 1.4286067653e-6 + +def gen_exp_coeffs_f32(deg): + half = mp.log(2) / 2 + cs = mp.chebyfit(lambda r: mp.e ** r, [-half, half], deg + 1) + return [f32(c) for c in reversed(cs)] + +def horner32(coeffs, r): + acc = f32(coeffs[-1]) + for c in reversed(coeffs[:-1]): + acc = f32(f32(acc * r) + c) + return acc + +def exp_f32(x, coeffs): + x = f32(x) + if x != x: return float('nan') + if x == INF: return INF + if x == -INF: return 0.0 + k = int(math.floor(f32(x * LOG2E_S) + 0.5)) + if k >= 129: return INF # overflow (f32 max exp ~88.7) + if k <= -151: return 0.0 # underflow (smallest subnormal ~2^-149) + r = f32(f32(x - f32(k * LN2HI_S)) - f32(k * LN2LO_S)) + p = horner32(coeffs, r) + try: return f32(math.ldexp(p, k)) + except OverflowError: return INF + +def check_exp_rs(): + deg = 6 + coeffs = gen_exp_coeffs_f32(deg) + print(f"# exp @rs: Cody-Waite reduction + degree-{deg} minimax poly (f32)") + print(f"LOG2E = {hexs(LOG2E_S)} LN2HI = {hexs(LN2HI_S)} LN2LO = {hexs(LN2LO_S)}") + print("coeffs (ascending, c0..c%d), as f32 hex:" % deg) + for i, c in enumerate(coeffs): + print(f" c{i:<2} = {hexs(c)} ({c!r})") + worst = 0.0; xw = None + for t in range(-20000, 20001, 7): + x = f32(mp.mpf(t) / 1000) + got = exp_f32(x, coeffs); tru = mp.e ** mp.mpf(x) + if tru == 0 or not math.isfinite(got): continue + e = abs(ulps32(got, tru)) + if e > worst: worst, xw = e, x + print(f"max error over x in [-20,20]: {worst:.3f} ULP at x={xw}") + print("expected (input -> output) bit patterns:") + for x in [0.0, 0.5, 1.0, -1.0, 2.0, 10.0, -5.0, 0.1]: + print(f" exp({x:+}) -> {hexs(exp_f32(x, coeffs))} in={hexs(x)}") + print("edge cases (in -> out):") + edges = [('+inf', INF), ('-inf', -INF), ('nan', float('nan')), + ('88.0', 88.0), ('89.0', 89.0), ('100.0', 100.0), + ('-103.0', -103.0), ('-104.0', -104.0), ('-110.0', -110.0)] + for name, x in edges: + o = exp_f32(x, coeffs) + ib = "0x7f800000" if x==INF else "0xff800000" if x==-INF else \ + "0x7fc00000" if x!=x else hexs(x) + print(f" exp({name:>7}) -> {hexs(o):>12} in={ib}") + +# ============================ log @rd ============================ +# x = 2^e * m, m in [sqrt(1/2), sqrt(2)); log = e*ln2 + 2s*P(z), +# s = (m-1)/(m+1), z = s*s, P(z) = sum_k z^k/(2k+1) (atanh series, z<=0.0294). +SQRT2 = f64(mp.sqrt(2)) +ONE = f64(1.0) +def gen_log_coeffs(deg): # P2(z) = 1/3 + z/5 + z^2/7 + ... + return [f64(mp.mpf(1) / (2*k + 3)) for k in range(deg + 1)] + +def log_f64(x, coeffs): + x = f64(x) + if x != x: return float('nan') # NaN + if x == INF: return INF # +inf + if x < 0.0 or x == -INF: return float('nan') # x<0 -> NaN + if x == 0.0: return -INF # log(+-0) -> -inf + # normalise subnormals so the bit-extraction reduction sees a normal m + add_e = 0 + if x < 2.2250738585072014e-308: # < smallest normal + x = f64(x * 18014398509481984.0); add_e = -54 # *2^54 + b = bits(x); ef = ((b >> 52) & 0x7ff) - 1023; m = of_bits((b & ((1<<52)-1)) | 0x3ff0000000000000) + if m >= SQRT2: m = f64(m * 0.5); ef += 1 + ef += add_e + # log(1+f) = f - s*(f - R), R = 2z*P2(z): keeps f as the exact leading term + f = f64(m - ONE) + s = f64(f / f64(m + ONE)) + z = f64(s * s) + p2 = horner(coeffs, z) + r = f64(f64(f64(z + z)) * p2) # 2z*P2(z) + l1 = f64(f - f64(s * f64(f - r))) # log(1+f) + e = f64(float(ef)) + return f64(f64(e * LN2HI) + f64(l1 + f64(e * LN2LO))) + +def check_log(): + deg = 9 + coeffs = gen_log_coeffs(deg) + print(f"# log @rd: x=2^e*m reduction + degree-{deg} atanh poly") + print(f"LN2HI = {hexd(LN2HI)} LN2LO = {hexd(LN2LO)} SQRT2 = {hexd(SQRT2)}") + print("coeffs (ascending, c0..c%d), as f64 hex:" % deg) + for i, c in enumerate(coeffs): + print(f" c{i:<2} = {hexd(c)} ({c!r})") + worst = 0.0; xw = None + for t in range(1, 200001, 7): + x = f64(mp.mpf(t) / 1000) + got = log_f64(x, coeffs); tru = mp.log(mp.mpf(x)) + if tru == 0 or not math.isfinite(got): continue + e = abs(ulps(got, tru)) + if e > worst: worst, xw = e, x + print(f"max error over x in (0,200]: {worst:.3f} ULP at x={xw}") + print("expected (input -> output) bit patterns:") + for x in [1.0, 2.0, 0.5, 10.0, 100.0, 0.1, 1.0e-300, 7.389056098930650]: + print(f" log({x}) -> {hexd(log_f64(x, coeffs))} in={hexd(x)}") + print("edge cases (in -> out):") + edges = [('+inf', INF), ('-inf', -INF), ('nan', float('nan')), + ('0.0', 0.0), ('-1.0', -1.0), ('1.0', 1.0)] + for name, x in edges: + o = log_f64(x, coeffs) + ib = "0x7ff0000000000000" if x==INF else "0xfff0000000000000" if x==-INF else \ + "0x7ff8000000000000" if x!=x else hexd(x) + print(f" log({name:>6}) -> {hexd(o):>18} in={ib}") + +# ============================ log @rs ============================ +SQRT2_S = f32(mp.sqrt(2)) +def gen_log_coeffs_f32(deg): + return [f32(mp.mpf(1) / (2*k + 3)) for k in range(deg + 1)] + +def log_f32(x, coeffs): + x = f32(x) + if x != x: return float('nan') + if x == INF: return INF + if x < 0.0 or x == -INF: return float('nan') + if x == 0.0: return -INF + add_e = 0 + if x < 1.1754943508222875e-38: # < smallest normal f32 + x = f32(x * 16777216.0); add_e = -24 # *2^24 + b = bits32(x); ef = ((b >> 23) & 0xff) - 127 + m = struct.unpack('>f', struct.pack('>I', (b & 0x7fffff) | 0x3f800000))[0] + if m >= SQRT2_S: m = f32(m * 0.5); ef += 1 + ef += add_e + f = f32(m - 1.0) + s = f32(f / f32(m + 1.0)) + z = f32(s * s) + p2 = horner32(coeffs, z) + r = f32(f32(z + z) * p2) + l1 = f32(f - f32(s * f32(f - r))) + e = f32(float(ef)) + return f32(f32(e * LN2HI_S) + f32(l1 + f32(e * LN2LO_S))) + +def check_log_rs(): + deg = 4 + coeffs = gen_log_coeffs_f32(deg) + print(f"# log @rs: x=2^e*m reduction + degree-{deg} atanh poly (f32)") + print(f"LN2HI = {hexs(LN2HI_S)} LN2LO = {hexs(LN2LO_S)} SQRT2 = {hexs(SQRT2_S)}") + print("coeffs (ascending, c0..c%d), as f32 hex:" % deg) + for i, c in enumerate(coeffs): + print(f" c{i:<2} = {hexs(c)} ({c!r})") + worst = 0.0; xw = None + for t in range(1, 200001, 7): + x = f32(mp.mpf(t) / 1000) + got = log_f32(x, coeffs); tru = mp.log(mp.mpf(x)) + if tru == 0 or not math.isfinite(got): continue + e = abs(ulps32(got, tru)) + if e > worst: worst, xw = e, x + print(f"max error over x in (0,200]: {worst:.3f} ULP at x={xw}") + print("expected (input -> output) bit patterns:") + for x in [1.0, 2.0, 0.5, 10.0, 100.0, 0.1, 1.0e-40, 7.389056]: + print(f" log({x}) -> {hexs(log_f32(x, coeffs))} in={hexs(x)}") + print("edge cases (in -> out):") + edges = [('+inf', INF), ('-inf', -INF), ('nan', float('nan')), + ('0.0', 0.0), ('-1.0', -1.0), ('1.0', 1.0)] + for name, x in edges: + o = log_f32(x, coeffs) + ib = "0x7f800000" if x==INF else "0xff800000" if x==-INF else \ + "0x7fc00000" if x!=x else hexs(x) + print(f" log({name:>6}) -> {hexs(o):>12} in={ib}") + +# ============================ sin/cos @rd ============================ +# x = q*(pi/2) + r, r in [-pi/4, pi/4]; pick sin/cos kernel by q&3. +PIO2 = mp.pi / 2 +INVPIO2 = f64(2 / mp.pi) +PIO2_1 = of_bits(bits(f64(PIO2)) & ~((1 << 22) - 1)) # pi/2, low 22 mantissa bits 0 +PIO2_1T = f64(PIO2 - mp.mpf(PIO2_1)) # tail +SIN_C = [f64(mp.mpf((-1)**(k+1)) / mp.factorial(2*k+3)) for k in range(8)] # S(z): -1/6,1/120,.. +COS_C = [f64(mp.mpf((-1)**k) / mp.factorial(2*k+4)) for k in range(8)] # C(z): 1/24,-1/720,.. + +def ksin(x, y): # fdlibm __kernel_sin(x,y); x+y = reduced arg + z = f64(x*x) + r = horner(SIN_C[1:], z) # S2 + z*S3 + ... (exact-Taylor, 7 terms) + v = f64(z*x) + return f64(x - f64(f64(f64(z*f64(f64(0.5*y) - f64(v*r))) - y) - f64(v*SIN_C[0]))) +def kcos(x, y): # fdlibm __kernel_cos(x,y) + z = f64(x*x) + rc = horner(COS_C, z) # C1 + z*C2 + ... (z^2*rc = r^4/24 - ...) + hz = f64(0.5*z); w2 = f64(1.0 - hz) + return f64(w2 + f64(f64(f64(1.0 - w2) - hz) + f64(f64(f64(z*z)*rc) - f64(x*y)))) +def reduce_pio2(x): # x = q*pi/2 + (rhi+rlo); Fast2Sum tail + q = int(math.floor(f64(x * INVPIO2) + 0.5)) + t = f64(x - f64(q * PIO2_1)) # exact in the Sterbenz region + w = f64(q * PIO2_1T) + rhi = f64(t - w) + rlo = f64(f64(t - rhi) - w) + return q, rhi, rlo +def sin_f64(x): # compute on |x|, apply odd symmetry + x = f64(x) + if x != x or x == INF or x == -INF: return float('nan') + if x == 0.0: return x # +-0 -> +-0 + neg = bits(x) >> 63; ax = f64(abs(x)) + q, rhi, rlo = reduce_pio2(ax); m = q & 3 + v = [ksin(rhi, rlo), kcos(rhi, rlo), + f64(-ksin(rhi, rlo)), f64(-kcos(rhi, rlo))][m] + return f64(-v) if neg else v +def cos_f64(x): # cos is even + x = f64(x) + if x != x or x == INF or x == -INF: return float('nan') + ax = f64(abs(x)) + q, rhi, rlo = reduce_pio2(ax); m = q & 3 + return [kcos(rhi, rlo), f64(-ksin(rhi, rlo)), + f64(-kcos(rhi, rlo)), ksin(rhi, rlo)][m] + +# ---- tan @rd: fdlibm __kernel_tan over the q*pi/2 reduction ---- +TAN_T = [f64(s) for s in [ + '3.33333333333334091986e-01','1.33333333333201242699e-01', + '5.39682539762260521377e-02','2.18694882948595424599e-02', + '8.86323982359930005737e-03','3.59207910759131235356e-03', + '1.45620945432529025516e-03','5.88041240820264096874e-04', + '2.46463134818469906812e-04','7.81794442939557092300e-05', + '7.14072491382608190305e-05','-1.85586374855275456654e-05', + '2.59073051863633712884e-05']] +PIO4 = f64(mp.pi/4); PIO4LO = of_bits(0x3C81A62633145C07) +TAN_BIG = of_bits(0x3FE5942800000000) # ~0.6744 (fdlibm high-word cut) +def head0(x): return of_bits(bits(x) & 0xffffffff00000000) +def ktan(x, y, iy): + hx_neg = bits(x) >> 63 + big = f64(abs(x)) >= TAN_BIG + if big: + if hx_neg: x = f64(-x); y = f64(-y) + z = f64(PIO4 - x); w0 = f64(PIO4LO - y); x = f64(z + w0); y = 0.0 + z = f64(x*x); w = f64(z*z) + r = horner([TAN_T[1],TAN_T[3],TAN_T[5],TAN_T[7],TAN_T[9],TAN_T[11]], w) + v = f64(z * horner([TAN_T[2],TAN_T[4],TAN_T[6],TAN_T[8],TAN_T[10],TAN_T[12]], w)) + s = f64(z * x) + r = f64(y + f64(z * f64(f64(s * f64(r + v)) + y))) + r = f64(r + f64(TAN_T[0] * s)) + w = f64(x + r) + if big: + fac = 1.0 if not hx_neg else -1.0 + v = float(iy) + return f64(fac * f64(v - f64(2.0 * f64(x - f64(f64(f64(w*w) / f64(w+v)) - r))))) + if iy == 1: return w + z2 = head0(w); v2 = f64(r - f64(z2 - x)); a = f64(-1.0 / w); t2 = head0(a) + s2 = f64(1.0 + f64(t2 * z2)) + return f64(t2 + f64(a * f64(s2 + f64(t2 * v2)))) +def tan_f64(x): + x = f64(x) + if x != x or x == INF or x == -INF: return float('nan') + if x == 0.0: return x + neg = bits(x) >> 63; ax = f64(abs(x)) + q, rhi, rlo = reduce_pio2(ax) + iy = -1 if (q & 1) else 1 + t = ktan(rhi, rlo, iy) + return f64(-t) if neg else t + +def check_tan(): + print("# tan @rd: q*pi/2 reduction + fdlibm __kernel_tan (deg-13 + cot path)") + print("T: " + " ".join(hexd(c) for c in TAN_T)) + print(f"PIO4={hexd(PIO4)} PIO4LO={hexd(PIO4LO)} BIG={hexd(TAN_BIG)}") + wt = wr = 0.0; xw = None + for t in range(-200000, 200001, 7): + x = f64(mp.mpf(t) / 1000) + tr = mp.tan(mp.mpf(x)) + if not math.isfinite(float(tr)) or abs(tr) > 1e15 or abs(tr) < 1e-9: continue + gt = tan_f64(x); gr = f64(sin_f64(x) / cos_f64(x)) # dedicated vs ratio + if math.isfinite(gt): + e = abs(ulps(gt, tr)) + if e > wt: wt, xw = e, x + if math.isfinite(gr): wr = max(wr, abs(ulps(gr, tr))) + print(f"dedicated max {wt:.3f} ULP at x={xw}; sin/cos ratio max {wr:.3f} ULP") + for x in [0.0, 0.5, 1.0, -1.0, 0.7853981633974483, 2.0, 10.0, 100.0]: + print(f" tan({x}) -> {hexd(tan_f64(x))} in={hexd(x)}") + for name, x in [('+inf', INF), ('nan', float('nan')), ('-0', -0.0)]: + o = tan_f64(x) + ib = "0x7ff0000000000000" if x==INF else "0x7ff8000000000000" if x!=x else hexd(x) + print(f" tan({name}) -> {hexd(o)} in={ib}") + +def check_trig(which): + fn = sin_f64 if which == 'sin' else cos_f64 + tru = mp.sin if which == 'sin' else mp.cos + print(f"# {which} @rd: x=q*pi/2+r reduction; kernels deg-7 (sin)/deg-7 (cos)") + print(f"INVPIO2={hexd(INVPIO2)} PIO2_1={hexd(PIO2_1)} PIO2_1T={hexd(PIO2_1T)}") + nm = 'SIN_C' if which=='sin' else 'COS_C' # both kernels are always needed; print both + for label, arr in [('SIN_C', SIN_C), ('COS_C', COS_C)]: + print(f"{label}: " + " ".join(hexd(c) for c in arr)) + worst = 0.0; xw = None + for t in range(-200000, 200001, 7): + x = f64(mp.mpf(t) / 1000) + got = fn(x); tr = tru(mp.mpf(x)) + if not math.isfinite(got) or abs(tr) < 1e-9: continue + e = abs(ulps(got, tr)) + if e > worst: worst, xw = e, x + print(f"max error over x in [-200,200] (|f|>1e-9): {worst:.3f} ULP at x={xw}") + print("expected (input -> output):") + for x in [0.0, 0.5, 1.0, -1.0, 1.5707963267948966, 3.141592653589793, 10.0, 100.0]: + print(f" {which}({x}) -> {hexd(fn(x))} in={hexd(x)}") + print("edges:") + for name, x in [('+inf', INF), ('-inf', -INF), ('nan', float('nan')), ('-0', -0.0)]: + o = fn(x) + ib = "0x7ff0000000000000" if x==INF else "0xfff0000000000000" if x==-INF else \ + "0x7ff8000000000000" if x!=x else hexd(x) + print(f" {which}({name:>5}) -> {hexd(o):>18} in={ib}") + +# ============================ sin/cos @rs ============================ +def of_bits32(b): return struct.unpack('>f', struct.pack('>I', b))[0] +INVPIO2_S = f32(2 / mp.pi) +# 3-part pi/2: P1,P2 each ~10 sig bits (so q*Pi exact for q<2^14), P3 the tail. +PIO2_1_S = of_bits32(bits32(f32(PIO2)) & ~0x1fff) +PIO2_2_S = of_bits32(bits32(f32(mp.mpf(PIO2) - mp.mpf(PIO2_1_S))) & ~0x1fff) +PIO2_3_S = f32(mp.mpf(PIO2) - mp.mpf(PIO2_1_S) - mp.mpf(PIO2_2_S)) +SIN_C_S = [f32(mp.mpf((-1)**(k+1)) / mp.factorial(2*k+3)) for k in range(5)] +COS_C_S = [f32(mp.mpf((-1)**k) / mp.factorial(2*k+4)) for k in range(5)] + +def ksin32(x, y): + z = f32(x*x); r = horner32(SIN_C_S[1:], z); v = f32(z*x) + return f32(x - f32(f32(f32(z*f32(f32(0.5*y) - f32(v*r))) - y) - f32(v*SIN_C_S[0]))) +def kcos32(x, y): + z = f32(x*x); rc = horner32(COS_C_S, z) + hz = f32(0.5*z); w2 = f32(1.0 - hz) + return f32(w2 + f32(f32(f32(1.0 - w2) - hz) + f32(f32(f32(z*z)*rc) - f32(x*y)))) +def reduce_pio2_32(x): + q = int(math.floor(f32(x * INVPIO2_S) + 0.5)) + r = f32(x - f32(q * PIO2_1_S)); r = f32(r - f32(q * PIO2_2_S)) + w = f32(q * PIO2_3_S); rhi = f32(r - w); rlo = f32(f32(r - rhi) - w) + return q, rhi, rlo +def sin_f32(x): + x = f32(x) + if x != x or x == INF or x == -INF: return float('nan') + if x == 0.0: return x + neg = bits32(x) >> 31; ax = f32(abs(x)) + q, rhi, rlo = reduce_pio2_32(ax); m = q & 3 + v = [ksin32(rhi,rlo), kcos32(rhi,rlo), f32(-ksin32(rhi,rlo)), f32(-kcos32(rhi,rlo))][m] + return f32(-v) if neg else v +def cos_f32(x): + x = f32(x) + if x != x or x == INF or x == -INF: return float('nan') + ax = f32(abs(x)) + q, rhi, rlo = reduce_pio2_32(ax); m = q & 3 + return [kcos32(rhi,rlo), f32(-ksin32(rhi,rlo)), f32(-kcos32(rhi,rlo)), ksin32(rhi,rlo)][m] + +def check_trig_rs(which): + fn = sin_f32 if which == 'sin' else cos_f32 + tru = mp.sin if which == 'sin' else mp.cos + print(f"# {which} @rs: x=q*pi/2+r reduction (f32)") + print(f"INVPIO2={hexs(INVPIO2_S)} PIO2_1={hexs(PIO2_1_S)} PIO2_2={hexs(PIO2_2_S)} PIO2_3={hexs(PIO2_3_S)}") + print("SIN_C: " + " ".join(hexs(c) for c in SIN_C_S)) + print("COS_C: " + " ".join(hexs(c) for c in COS_C_S)) + worst = 0.0; xw = None + for t in range(-200000, 200001, 7): + x = f32(mp.mpf(t) / 1000); got = fn(x); tr = tru(mp.mpf(x)) + if not math.isfinite(got) or abs(tr) < 1e-6: continue + e = abs(ulps32(got, tr)) + if e > worst: worst, xw = e, x + print(f"max error over x in [-200,200] (|f|>1e-6): {worst:.3f} ULP at x={xw}") + print("expected (input -> output):") + for x in [0.0, 0.5, 1.0, -1.0, 1.5707964, 3.1415927, 10.0, 100.0]: + print(f" {which}({x}) -> {hexs(fn(x))} in={hexs(x)}") + for name, x in [('+inf', INF), ('-inf', -INF), ('nan', float('nan')), ('-0', -0.0)]: + o = fn(x) + ib = "0x7f800000" if x==INF else "0xff800000" if x==-INF else \ + "0x7fc00000" if x!=x else hexs(x) + print(f" {which}({name:>5}) -> {hexs(o):>12} in={ib}") + +# ============================ atan @rd ============================ +# fdlibm s_atan: reduce |x| against breakpoints 7/16,11/16,19/16,39/16 to a +# small argument near atan(0.5)/atan(1)/atan(1.5)/atan(inf), minimax poly. +ATAN_AT = [f64(s) for s in [ + '3.33333333333329318027e-01','-1.99999999998764832476e-01', + '1.42857142725034663711e-01','-1.11111104054623557880e-01', + '9.09088713343650656196e-02','-7.69187620504482999495e-02', + '6.66107313738753120669e-02','-5.83357013379057348645e-02', + '4.97687799461593236017e-02','-3.65315727442169155270e-02', + '1.62858201153657823623e-02']] +ATAN_BP = [f64(7)/16, f64(11)/16, f64(19)/16, f64(39)/16] +ATANHI = [f64(mp.atan(mp.mpf('0.5'))), f64(mp.pi/4), f64(mp.atan(mp.mpf('1.5'))), f64(mp.pi/2)] +ATANLO = [f64(mp.atan(mp.mpf('0.5')) - mp.mpf(ATANHI[0])), f64(mp.pi/4 - mp.mpf(ATANHI[1])), + f64(mp.atan(mp.mpf('1.5')) - mp.mpf(ATANHI[2])), f64(mp.pi/2 - mp.mpf(ATANHI[3]))] + +def atan_kernel(x): # x >= 0 finite + if x < ATAN_BP[0]: idd = -1; xr = x + elif x < ATAN_BP[1]: idd = 0; xr = f64(f64(f64(x+x)-ONE) / f64(2.0+x)) + elif x < ATAN_BP[2]: idd = 1; xr = f64(f64(x-ONE) / f64(x+ONE)) + elif x < ATAN_BP[3]: idd = 2; xr = f64(f64(x-1.5) / f64(ONE+f64(1.5*x))) + else: idd = 3; xr = f64(-1.0 / x) + z = f64(xr*xr) + s = f64(z * horner(ATAN_AT, z)) # (s1+s2) + if idd < 0: return f64(xr - f64(xr*s)) + return f64(ATANHI[idd] - f64(f64(f64(xr*s) - ATANLO[idd]) - xr)) +def atan_f64(x): + x = f64(x) + if x != x: return float('nan') + if x == INF: return ATANHI[3] + if x == -INF: return f64(-ATANHI[3]) + if x == 0.0: return x + neg = bits(x) >> 63; ax = f64(abs(x)) + r = atan_kernel(ax) + return f64(-r) if neg else r + +def check_atan(): + print("# atan @rd: fdlibm breakpoint reduction + degree-10 minimax poly") + print("AT: " + " ".join(hexd(c) for c in ATAN_AT)) + print("BP: " + " ".join(hexd(c) for c in ATAN_BP)) + print("ATANHI: " + " ".join(hexd(c) for c in ATANHI)) + print("ATANLO: " + " ".join(hexd(c) for c in ATANLO)) + worst = 0.0; xw = None + for t in range(-500000, 500001, 7): + x = f64(mp.mpf(t) / 1000); got = atan_f64(x); tr = mp.atan(mp.mpf(x)) + if abs(tr) < 1e-12 or not math.isfinite(got): continue + e = abs(ulps(got, tr)) + if e > worst: worst, xw = e, x + print(f"max error over x in [-500,500]: {worst:.3f} ULP at x={xw}") + print("expected:") + for x in [0.0, 0.5, 1.0, -1.0, 1.5, 2.0, 10.0, 0.1, -0.7]: + print(f" atan({x}) -> {hexd(atan_f64(x))} in={hexd(x)}") + for name, x in [('+inf', INF), ('-inf', -INF), ('nan', float('nan')), ('-0', -0.0)]: + o = atan_f64(x) + ib = "0x7ff0000000000000" if x==INF else "0xfff0000000000000" if x==-INF else \ + "0x7ff8000000000000" if x!=x else hexd(x) + print(f" atan({name:>5}) -> {hexd(o):>18} in={ib}") + +# ============================ atan @rs ============================ +ATAN_AT_S = [f32(s) for s in ['3.3333328366e-01','-1.9999158382e-01', + '1.4253635705e-01','-1.0648017377e-01','6.1687607318e-02']] +ATAN_BP_S = [f32(7)/16, f32(11)/16, f32(19)/16, f32(39)/16] +ATANHI_S = [f32(mp.atan(mp.mpf('0.5'))), f32(mp.pi/4), f32(mp.atan(mp.mpf('1.5'))), f32(mp.pi/2)] +ATANLO_S = [f32(mp.atan(mp.mpf('0.5')) - mp.mpf(ATANHI_S[0])), f32(mp.pi/4 - mp.mpf(ATANHI_S[1])), + f32(mp.atan(mp.mpf('1.5')) - mp.mpf(ATANHI_S[2])), f32(mp.pi/2 - mp.mpf(ATANHI_S[3]))] +def atan_kernel32(x): + if x < ATAN_BP_S[0]: idd = -1; xr = x + elif x < ATAN_BP_S[1]: idd = 0; xr = f32(f32(f32(x+x)-1.0) / f32(2.0+x)) + elif x < ATAN_BP_S[2]: idd = 1; xr = f32(f32(x-1.0) / f32(x+1.0)) + elif x < ATAN_BP_S[3]: idd = 2; xr = f32(f32(x-1.5) / f32(1.0+f32(1.5*x))) + else: idd = 3; xr = f32(-1.0 / x) + z = f32(xr*xr) + s = f32(z * horner32(ATAN_AT_S, z)) + if idd < 0: return f32(xr - f32(xr*s)) + return f32(ATANHI_S[idd] - f32(f32(f32(xr*s) - ATANLO_S[idd]) - xr)) +def atan_f32(x): + x = f32(x) + if x != x: return float('nan') + if x == INF: return ATANHI_S[3] + if x == -INF: return f32(-ATANHI_S[3]) + if x == 0.0: return x + neg = bits32(x) >> 31; ax = f32(abs(x)) + r = atan_kernel32(ax) + return f32(-r) if neg else r + +def check_atan_rs(): + print("# atan @rs: fdlibm breakpoint reduction + degree-4 minimax poly (f32)") + print("AT: " + " ".join(hexs(c) for c in ATAN_AT_S)) + print("BP: " + " ".join(hexs(c) for c in ATAN_BP_S)) + print("ATANHI: " + " ".join(hexs(c) for c in ATANHI_S)) + print("ATANLO: " + " ".join(hexs(c) for c in ATANLO_S)) + worst = 0.0; xw = None + for t in range(-500000, 500001, 7): + x = f32(mp.mpf(t) / 1000); got = atan_f32(x); tr = mp.atan(mp.mpf(x)) + if abs(tr) < 1e-7 or not math.isfinite(got): continue + e = abs(ulps32(got, tr)) + if e > worst: worst, xw = e, x + print(f"max error over x in [-500,500]: {worst:.3f} ULP at x={xw}") + print("expected:") + for x in [0.0, 0.5, 1.0, -1.0, 1.5, 2.0, 10.0, 0.1, -0.7]: + print(f" atan({x}) -> {hexs(atan_f32(x))} in={hexs(x)}") + for name, x in [('+inf', INF), ('-inf', -INF), ('nan', float('nan')), ('-0', -0.0)]: + o = atan_f32(x) + ib = "0x7f800000" if x==INF else "0xff800000" if x==-INF else \ + "0x7fc00000" if x!=x else hexs(x) + print(f" atan({name:>5}) -> {hexs(o):>12} in={ib}") + +# ============================ asin @rd ============================ +# fdlibm e_asin: |x|<0.5 rational x+x*R(x^2); |x| in [0.5,1) via t=(1-|x|)/2, +# s=sqrt(t), asin = pi/2 - 2*(s + s*R(t)). R = P/Q (pS/qS coeffs). +PS = [f64(s) for s in ['1.66666666666666657415e-01','-3.25565818622400915405e-01', + '2.01212532134862925881e-01','-4.00555345006794114027e-02', + '7.91534994289814532176e-04','3.47933107596021167570e-05']] +QS = [f64(s) for s in ['-2.40339491173441421878e+00','2.02094576023350569471e+00', + '-6.88283971605453293030e-01','7.70381505559019352791e-02']] +PIO2_H = f64(mp.pi/2); PIO2_L = f64(mp.pi/2 - mp.mpf(PIO2_H)); PIO4_H = f64(mp.pi/4) +ASIN_THRESH = of_bits(0x3fef333300000000) # ~0.975 (fdlibm high-word cut) + +def asin_R(t): # P(t)/Q(t) + p = f64(t * horner(PS, t)) + q = f64(ONE + f64(t * horner(QS, t))) + return f64(p / q) +def asin_f64(x): + x = f64(x) + if x != x: return float('nan') + ax = f64(abs(x)); sgn = bits(x) >> 63 + if ax > ONE: return float('nan') + if ax == ONE: + return f64(f64(x * PIO2_H) + f64(x * PIO2_L)) # +-pi/2 with sign of x + if ax < 0.5: + if ax < 1.49e-8: return x # |x|<2^-26: asin(x)=x + t = f64(x * x) + return f64(x + f64(x * asin_R(t))) + w = f64(ONE - ax); t = f64(w * 0.5) + r = asin_R(t); s = f64(math.sqrt(t)) + if ax >= ASIN_THRESH: # near 1: simple form + res = f64(PIO2_H - f64(f64(2.0 * f64(s + f64(s * r))) - PIO2_L)) + else: # head/tail recovers low bits of s + df = of_bits(bits(s) & 0xffffffff00000000) + c = f64(f64(t - f64(df * df)) / f64(s + df)) + p2 = f64(f64(2.0 * f64(s * r)) - f64(PIO2_L - f64(2.0 * c))) + q2 = f64(PIO4_H - f64(2.0 * df)) + res = f64(PIO4_H - f64(p2 - q2)) + return f64(-res) if sgn else res + +PI_H = f64(mp.pi) +def acos_f64(x): # fdlibm e_acos + x = f64(x) + if x != x: return float('nan') + ax = f64(abs(x)); neg = bits(x) >> 63 + if ax > ONE: return float('nan') + if ax == ONE: + return 0.0 if not neg else f64(PI_H + f64(2.0 * PIO2_L)) # acos(1)=0, acos(-1)=pi + if ax < 0.5: + if ax < 6.94e-18: return PIO2_H # |x|<2^-57 + z = f64(x * x); r = asin_R(z) + return f64(PIO2_H - f64(x - f64(PIO2_L - f64(x * r)))) + if neg: # x <= -0.5 + z = f64(f64(ONE + x) * 0.5); s = f64(math.sqrt(z)); r = asin_R(z) + w = f64(f64(r * s) - PIO2_L) + return f64(PI_H - f64(2.0 * f64(s + w))) + z = f64(f64(ONE - x) * 0.5); s = f64(math.sqrt(z)) # x >= 0.5 + df = of_bits(bits(s) & 0xffffffff00000000) + c = f64(f64(z - f64(df * df)) / f64(s + df)) + r = asin_R(z); w = f64(f64(r * s) + c) + return f64(2.0 * f64(df + w)) + +def check_acos(): + print("# acos @rd: fdlibm rational kernel (shares asin P/Q)") + worst = 0.0; xw = None + for t in range(-1000000, 1000001, 3): + x = f64(mp.mpf(t) / 1000000) + got = acos_f64(x); tr = mp.acos(mp.mpf(x)) + if abs(tr) < 1e-12 or not math.isfinite(got): continue + e = abs(ulps(got, tr)) + if e > worst: worst, xw = e, x + print(f"max error over x in [-1,1]: {worst:.3f} ULP at x={xw}") + for x in [0.0, 0.5, 1.0, -1.0, 0.25, 0.75, 0.9, -0.6, -0.9, 0.1]: + print(f" acos({x}) -> {hexd(acos_f64(x))} in={hexd(x)}") + for name, x in [('nan', float('nan')), ('1.5', 1.5), ('-2', -2.0)]: + print(f" acos({name}) -> {hexd(acos_f64(x))}") + +def check_asin(): + print("# asin @rd: fdlibm rational kernel") + print("PS: " + " ".join(hexd(c) for c in PS)) + print("QS: " + " ".join(hexd(c) for c in QS)) + print(f"PIO2_H={hexd(PIO2_H)} PIO2_L={hexd(PIO2_L)}") + worst = 0.0; xw = None + for t in range(-1000000, 1000001, 3): + x = f64(mp.mpf(t) / 1000000) + got = asin_f64(x); tr = mp.asin(mp.mpf(x)) + if abs(tr) < 1e-12 or not math.isfinite(got): continue + e = abs(ulps(got, tr)) + if e > worst: worst, xw = e, x + print(f"max error over x in [-1,1]: {worst:.3f} ULP at x={xw}") + for x in [0.0, 0.5, 1.0, -1.0, 0.25, 0.75, 0.9, 0.99, -0.6, 0.1]: + print(f" asin({x}) -> {hexd(asin_f64(x))} in={hexd(x)}") + for name, x in [('nan', float('nan')), ('-0', -0.0), ('1.5', 1.5), ('-2', -2.0)]: + print(f" asin({name}) -> {hexd(asin_f64(x))}") + +# ============================ asin/acos @rs ============================ +PS_S = [f32(s) for s in ['1.6666586697e-01','-4.2743422091e-02','-8.6563630030e-03']] +QS1_S = f32('-7.0662963390e-01') +PIO2_HS = f32(mp.pi/2); PIO2_LS = f32(mp.pi/2 - mp.mpf(PIO2_HS)); PI_HS = f32(mp.pi) +PIO4_HS = f32(mp.pi/4) +def asin_R32(t): + p = f32(t * horner32(PS_S, t)) + q = f32(1.0 + f32(t * QS1_S)) + return f32(p / q) +def asin_f32(x): + x = f32(x) + if x != x: return float('nan') + ax = f32(abs(x)); sgn = bits32(x) >> 31 + if ax > 1.0: return float('nan') + if ax == 1.0: return f32(f32(x * PIO2_HS) + f32(x * PIO2_LS)) + if ax < 0.5: + if ax < 2.44e-4: return x + return f32(x + f32(x * asin_R32(f32(x * x)))) + w = f32(1.0 - ax); t = f32(w * 0.5) + s = f32(math.sqrt(t)); r = asin_R32(t) + if ax >= 0.975: + res = f32(PIO2_HS - f32(2.0 * f32(s + f32(s * r)))) + else: + df = of_bits32(bits32(s) & 0xfffff000) + c = f32(f32(t - f32(df * df)) / f32(s + df)) + p2 = f32(f32(2.0 * f32(s * r)) - f32(PIO2_LS - f32(2.0 * c))) + q2 = f32(PIO4_HS - f32(2.0 * df)) + res = f32(PIO4_HS - f32(p2 - q2)) + return f32(-res) if sgn else res +def acos_f32(x): + x = f32(x) + if x != x: return float('nan') + ax = f32(abs(x)); neg = bits32(x) >> 31 + if ax > 1.0: return float('nan') + if ax == 1.0: return 0.0 if not neg else f32(PI_HS + f32(2.0 * PIO2_LS)) + if ax < 0.5: + if ax < 1.49e-8: return PIO2_HS # |x| < 2^-26 + z = f32(x * x); r = asin_R32(z) + return f32(PIO2_HS - f32(x - f32(PIO2_LS - f32(x * r)))) + if neg: + z = f32(f32(1.0 + x) * 0.5); s = f32(math.sqrt(z)); r = asin_R32(z) + w = f32(f32(r * s) - PIO2_LS) + return f32(PI_HS - f32(2.0 * f32(s + w))) + z = f32(f32(1.0 - x) * 0.5); s = f32(math.sqrt(z)); r = asin_R32(z) + w = f32(f32(r * s) + 0.0) # f32: no head/tail split + # use simple form: acos = 2*asin(sqrt((1-x)/2)) = 2*(s + s*r) + return f32(2.0 * f32(s + f32(s * r))) + +def check_ainv_rs(which): + fn = asin_f32 if which == 'asin' else acos_f32 + tru = mp.asin if which == 'asin' else mp.acos + print(f"# {which} @rs: fdlibm rational kernel (f32)") + if which == 'asin': + print("PS: " + " ".join(hexs(c) for c in PS_S) + " QS1: " + hexs(QS1_S)) + print(f"PIO2_H={hexs(PIO2_HS)} PIO2_L={hexs(PIO2_LS)} PI_H={hexs(PI_HS)}") + worst = 0.0; xw = None + for t in range(-1000000, 1000001, 7): + x = f32(mp.mpf(t) / 1000000); got = fn(x); tr = tru(mp.mpf(x)) + if abs(tr) < 1e-7 or not math.isfinite(got): continue + e = abs(ulps32(got, tr)) + if e > worst: worst, xw = e, x + print(f"max error over x in [-1,1]: {worst:.3f} ULP at x={xw}") + for x in [0.0, 0.5, 1.0, -1.0, 0.25, 0.75, 0.9, -0.6, 0.1]: + print(f" {which}({x}) -> {hexs(fn(x))} in={hexs(x)}") + for name, x in [('nan', float('nan')), ('1.5', 1.5)]: + print(f" {which}({name}) -> {hexs(fn(x))}") + +# ---- tan @rs: 3-part pi/2 reduction + f32 __kernel_tan ---- +TAN_T_S = [f32(c) for c in TAN_T[:7]] +PIO4_S = f32(mp.pi/4); PIO4LO_S = f32(mp.pi/4 - mp.mpf(PIO4_S)) +TAN_BIG_S = f32(0.6744) +def head0_32(x): return of_bits32(bits32(x) & 0xfffff000) +def ktan32(x, y, iy): + hx_neg = bits32(x) >> 31 + big = f32(abs(x)) >= TAN_BIG_S + if big: + if hx_neg: x = f32(-x); y = f32(-y) + z = f32(PIO4_S - x); w0 = f32(PIO4LO_S - y); x = f32(z + w0); y = 0.0 + z = f32(x*x); w = f32(z*z) + r = horner32([TAN_T_S[1],TAN_T_S[3],TAN_T_S[5]], w) + v = f32(z * horner32([TAN_T_S[2],TAN_T_S[4],TAN_T_S[6]], w)) + s = f32(z * x) + r = f32(y + f32(z * f32(f32(s * f32(r + v)) + y))) + r = f32(r + f32(TAN_T_S[0] * s)) + w = f32(x + r) + if big: + fac = 1.0 if not hx_neg else -1.0 + v = float(iy) + return f32(fac * f32(v - f32(2.0 * f32(x - f32(f32(f32(w*w) / f32(w+v)) - r))))) + if iy == 1: return w + z2 = head0_32(w); v2 = f32(r - f32(z2 - x)); a = f32(-1.0 / w); t2 = head0_32(a) + s2 = f32(1.0 + f32(t2 * z2)) + return f32(t2 + f32(a * f32(s2 + f32(t2 * v2)))) +def tan_f32(x): + x = f32(x) + if x != x or x == INF or x == -INF: return float('nan') + if x == 0.0: return x + neg = bits32(x) >> 31; ax = f32(abs(x)) + q, rhi, rlo = reduce_pio2_32(ax) + iy = -1 if (q & 1) else 1 + t = ktan32(rhi, rlo, iy) + return f32(-t) if neg else t + +def check_tan_rs(): + print("# tan @rs: 3-part pi/2 reduction + f32 __kernel_tan") + print("T: " + " ".join(hexs(c) for c in TAN_T_S)) + print(f"PIO4={hexs(PIO4_S)} PIO4LO={hexs(PIO4LO_S)} BIG={hexs(TAN_BIG_S)}") + wt = 0.0; xw = None + for t in range(-200000, 200001, 7): + x = f32(mp.mpf(t) / 1000); tr = mp.tan(mp.mpf(x)) + if not math.isfinite(float(tr)) or abs(tr) > 1e7 or abs(tr) < 1e-6: continue + gt = tan_f32(x) + if math.isfinite(gt): + e = abs(ulps32(gt, tr)) + if e > wt: wt, xw = e, x + print(f"dedicated max {wt:.3f} ULP at x={xw}") + for x in [0.0, 0.5, 1.0, -1.0, 0.7853982, 2.0, 10.0, 100.0]: + print(f" tan({x}) -> {hexs(tan_f32(x))} in={hexs(x)}") + for name, x in [('+inf', INF), ('nan', float('nan')), ('-0', -0.0)]: + o = tan_f32(x) + ib = "0x7f800000" if x==INF else "0x7fc00000" if x!=x else hexs(x) + print(f" tan({name}) -> {hexs(o)} in={ib}") + + + +# ============================ @rq (f128) ============================ +# numpy float128 is x87 80-bit ext, NOT true IEEE quad, so we model f128 with +# mpmath rounded to 113-bit RNE per op (= SoftFloat f128 for + - * / ldexp). +# exp @rq vertical slice; the rest of the rq surface follows the @rd pattern +# at higher degree. Run: python3 cheb_check.py exp-rq +QP = 113 +def qr(x): # round to f128 (113-bit RNE) + x = mp.mpf(x) + if x == 0: return mp.mpf(0) + m, e = mp.frexp(x); M = mp.nint(m * mp.mpf(2)**QP) + return M * mp.mpf(2)**(e - QP) +def qhex(v): # 128-bit IEEE-quad encoding + if v == 0: return "0x" + "0"*32 + s = 1 if v < 0 else 0; a = abs(mp.mpf(v)) + m, e = mp.frexp(a); biased = (e - 1) + 16383 + mant = int(mp.nint((m*2 - 1) * mp.mpf(2)**112)) + if mant == (1 << 112): mant = 0; biased += 1 + return f"0x{(s<<127)|(biased<<112)|mant:032x}" +def qadd(a, b): return qr(mp.mpf(a) + mp.mpf(b)) +def qmul(a, b): return qr(mp.mpf(a) * mp.mpf(b)) +def qsub(a, b): return qr(mp.mpf(a) - mp.mpf(b)) +def qhorner(co, r): + acc = co[-1] + for c in reversed(co[:-1]): acc = qadd(qmul(acc, r), c) + return acc +def gen_exp_coeffs_q(deg): + half = mp.log(2) / 2 + return [qr(c) for c in reversed(mp.chebyfit(lambda r: mp.e**r, [-half, half], deg+1))] +QLOG2E = qr(1 / mp.log(2)) +_ln2 = mp.log(2) +QLN2HI = qr(mp.mpf(int(qr(_ln2) * mp.mpf(2)**80)) / mp.mpf(2)**80) +QLN2LO = qr(_ln2 - QLN2HI) +def exp_q(x, co): + x = qr(x); k = int(mp.floor(qr(x * QLOG2E) + 0.5)) + r = qsub(qsub(x, qmul(mp.mpf(k), QLN2HI)), qmul(mp.mpf(k), QLN2LO)) + return qr(qhorner(co, r) * mp.mpf(2)**k) +def check_exp_rq(): + deg = 24; co = gen_exp_coeffs_q(deg) + print(f"# exp @rq: Cody-Waite + degree-{deg} minimax poly (f128, 113-bit)") + print(f"LOG2E={qhex(QLOG2E)}\nLN2HI={qhex(QLN2HI)}\nLN2LO={qhex(QLN2LO)}") + print("coeffs c0..c%d:" % deg) + for i, c in enumerate(co): print(f" c{i:<2}={qhex(c)}") + worst = 0 + for t in range(-2000, 2001): + x = qr(mp.mpf(t)/100); g = exp_q(x, co); tr = mp.e**x + ulp = mp.mpf(2)**(mp.frexp(g)[1] - QP); worst = max(worst, abs((g-tr)/ulp)) + print(f"max error over [-20,20]: {float(worst):.3f} ULP") + for v in ['1','0.5','-2','10']: + print(f" exp({v}) -> {qhex(exp_q(mp.mpf(v), co))}") + +if __name__ == '__main__': + fn = sys.argv[1] if len(sys.argv) > 1 else 'exp' + {'exp': check_exp, 'exp-rs': check_exp_rs, + 'log': check_log, 'log-rs': check_log_rs, + 'sin': lambda: check_trig('sin'), 'cos': lambda: check_trig('cos'), + 'sin-rs': lambda: check_trig_rs('sin'), 'cos-rs': lambda: check_trig_rs('cos'), + 'atan': check_atan, 'atan-rs': check_atan_rs, 'asin': check_asin, 'acos': check_acos, + 'asin-rs': lambda: check_ainv_rs('asin'), 'acos-rs': lambda: check_ainv_rs('acos'), + 'tan': check_tan, 'tan-rs': check_tan_rs, 'exp-rq': check_exp_rq}[fn]() diff --git a/libmath/tools/rq_check.c b/libmath/tools/rq_check.c new file mode 100644 index 0000000..ea30301 --- /dev/null +++ b/libmath/tools/rq_check.c @@ -0,0 +1,107 @@ +// rq_check.c -- SoftFloat-f128 reference for @rq transcendentals (PR #18). +// SoftFloat = the jet arithmetic (this code IS the jet body, modulo the u3 ABI); +// MPFR = correctly-rounded truth. Coeffs from mpmath @113-bit. +// Build: re-archive libsoftfloat.a with `libtool -static` (zig .a not 8-byte +// aligned for Apple ld), then cc -I -I/opt/homebrew/include +// rq_check.c /tmp/libsoftfloat.a -lmpfr -lgmp. +#include +#include +#include +#include "softfloat.h" +#include +typedef float128_t q; +static q qadd(q a,q b){q r; f128M_add(&a,&b,&r); return r;} +static q qsub(q a,q b){q r; f128M_sub(&a,&b,&r); return r;} +static q qmul(q a,q b){q r; f128M_mul(&a,&b,&r); return r;} +static q qdiv(q a,q b){q r; f128M_div(&a,&b,&r); return r;} +static q qsqt(q a){q r; f128M_sqrt(&a,&r); return r;} +static q qi(int64_t n){q r; i64_to_f128M(n,&r); return r;} +static q q2k(int k){q r; r.v[0]=0; r.v[1]=((uint64_t)(k+16383))<<48; return r;} +static q negq(q a){a.v[1]^=0x8000000000000000ULL; return a;} +static void qprint(const char*nm,q a){printf("%s0x%016llx%016llx\n",nm,(unsigned long long)a.v[1],(unsigned long long)a.v[0]);} +static void q_to_mpfr(mpfr_t o,q a){uint64_t hi=a.v[1],lo=a.v[0]; int s=hi>>63; int e=(hi>>48)&0x7fff; + uint64_t mh=hi&0xffffffffffffULL; mpfr_set_ui(o,mh,MPFR_RNDN); mpfr_mul_2ui(o,o,64,MPFR_RNDN); + mpfr_t t; mpfr_init2(t,128); mpfr_set_ui(t,lo,MPFR_RNDN); mpfr_add(o,o,t,MPFR_RNDN); mpfr_clear(t); + mpfr_t one; mpfr_init2(one,128); mpfr_set_ui(one,1,MPFR_RNDN); mpfr_mul_2ui(one,one,112,MPFR_RNDN); + mpfr_add(o,o,one,MPFR_RNDN); mpfr_clear(one); mpfr_mul_2si(o,o,e-16383-112,MPFR_RNDN); if(s)mpfr_neg(o,o,MPFR_RNDN);} +static double ulp_err(q g,mpfr_t tr){ + mpfr_t gv,d,u; mpfr_inits2(300,gv,d,u,(mpfr_ptr)0); q_to_mpfr(gv,g); + mpfr_sub(d,gv,tr,MPFR_RNDN); long e2; mpfr_get_d_2exp(&e2,gv,MPFR_RNDN); + mpfr_set_ui(u,1,MPFR_RNDN); mpfr_mul_2si(u,u,(int)e2-1-112,MPFR_RNDN); mpfr_div(d,d,u,MPFR_RNDN); + double r=mpfr_get_d(d,MPFR_RNDN); if(r<0)r=-r; mpfr_clears(gv,d,u,(mpfr_ptr)0); return r;} +static const q LOG2E={{0xe1777d0ffda0d23aull,0x3fff71547652b82full}},LN2HI={{0xf35793c800000000ull,0x3ffe62e42fefa39eull}},LN2LO={{0xfc32f366359d274aull,0xbfad319ff0342542ull}},HALF={{0x0000000000000000ull,0x3ffe000000000000ull}},ONE={{0x0000000000000000ull,0x3fff000000000000ull}},SQRT2={{0xc908b2fb1366ea95ull,0x3fff6a09e667f3bcull}}; +static const q INVPIO2={{0x2a53f84eafa3ea6aull,0x3ffe45f306dc9c88ull}},PIO2_1={{0x8460000000000000ull,0x3fff921fb54442d1ull}},PIO2_1T={{0x07344a409382229aull,0x3fc2313198a2e037ull}}; +static const q EXC[25]={ + {{0x0000000000000000ull,0x3fff000000000000ull}}, {{0x0000000000000000ull,0x3fff000000000000ull}}, {{0x0000000000000000ull,0x3ffe000000000000ull}}, {{0x5555555555555555ull,0x3ffc555555555555ull}}, {{0x5555555555555555ull,0x3ffa555555555555ull}}, {{0x1111111111111111ull,0x3ff8111111111111ull}}, {{0x6c16c16c16c16c17ull,0x3ff56c16c16c16c1ull}}, {{0xa01a01a01a01a3e8ull,0x3ff2a01a01a01a01ull}}, {{0xa01a01a01a01a146ull,0x3fefa01a01a01a01ull}}, {{0x38faac1c88a5a526ull,0x3fec71de3a556c73ull}}, {{0xc72ef016d3d6e867ull,0x3fe927e4fb7789f5ull}}, {{0x38fe748363c46e8bull,0x3fe5ae64567f544eull}}, {{0x7b544dab18f475c5ull,0x3fe21eed8eff8d89ull}}, {{0x97c9f3aebabb2423ull,0x3fde6124613a86d0ull}}, {{0xd20b83c7f94d17d8ull,0x3fda93974a8c07c9ull}}, {{0xf5f4284f0d74f9e7ull,0x3fd6ae7f3e733b81ull}}, {{0xf417b4d27c5f92a9ull,0x3fd2ae7f3e733b81ull}}, {{0x6a419e674779c97cull,0x3fce952c77030a99ull}}, {{0x0466ff8c8b42b3dfull,0x3fca6827863b97b5ull}}, {{0x874b7a686d819241ull,0x3fc62f49b469f892ull}}, {{0xbb3b32a11bb5f139ull,0x3fc1e542ba427463ull}}, {{0xc93890ff9ab55cbbull,0x3fbd71b8db9f7f73ull}}, {{0x6efc0717eae785a1ull,0x3fb90ce38aab7bd7ull}}, {{0xcb3f4f7edfaa2666ull,0x3fb47693274bab2aull}}, {{0x61cb0e23655d47cbull,0x3faff3629154e0a7ull}} +}; +static const q LOGC[23]={ + {{0x5555555555555555ull,0x3ffd555555555555ull}}, {{0x999999999999999aull,0x3ffc999999999999ull}}, {{0x2492492492492492ull,0x3ffc249249249249ull}}, {{0xc71c71c71c71c71cull,0x3ffbc71c71c71c71ull}}, {{0x5d1745d1745d1746ull,0x3ffb745d1745d174ull}}, {{0x3b13b13b13b13b14ull,0x3ffb3b13b13b13b1ull}}, {{0x1111111111111111ull,0x3ffb111111111111ull}}, {{0xe1e1e1e1e1e1e1e2ull,0x3ffae1e1e1e1e1e1ull}}, {{0x86bca1af286bca1bull,0x3ffaaf286bca1af2ull}}, {{0x8618618618618618ull,0x3ffa861861861861ull}}, {{0x42c8590b21642c86ull,0x3ffa642c8590b216ull}}, {{0xae147ae147ae147bull,0x3ffa47ae147ae147ull}}, {{0x84bda12f684bda13ull,0x3ffa2f684bda12f6ull}}, {{0x611a7b9611a7b961ull,0x3ffa1a7b9611a7b9ull}}, {{0x4210842108421084ull,0x3ffa084210842108ull}}, {{0x7c1f07c1f07c1f08ull,0x3ff9f07c1f07c1f0ull}}, {{0xd41d41d41d41d41dull,0x3ff9d41d41d41d41ull}}, {{0xf914c1bacf914c1cull,0x3ff9bacf914c1bacull}}, {{0xa41a41a41a41a41aull,0x3ff9a41a41a41a41ull}}, {{0x9c18f9c18f9c18faull,0x3ff98f9c18f9c18full}}, {{0x417d05f417d05f41ull,0x3ff97d05f417d05full}}, {{0x6c16c16c16c16c17ull,0x3ff96c16c16c16c1ull}}, {{0x72620ae4c415c988ull,0x3ff95c9882b93105ull}} +}; +static const q SINC[16]={ + {{0x5555555555555555ull,0xbffc555555555555ull}}, {{0x1111111111111111ull,0x3ff8111111111111ull}}, {{0xa01a01a01a01a01aull,0xbff2a01a01a01a01ull}}, {{0x38faac1c88e50017ull,0x3fec71de3a556c73ull}}, {{0x38fe747e4b837dc7ull,0xbfe5ae64567f544eull}}, {{0x97ca38331d23af68ull,0x3fde6124613a86d0ull}}, {{0xf11d8656b0ee8cb0ull,0xbfd6ae7f3e733b81ull}}, {{0xa6b2605197771b00ull,0x3fce952c77030ad4ull}}, {{0x724ca1ec3b7b9675ull,0xbfc62f49b4681415ull}}, {{0x18bef146fcee6e45ull,0x3fbd71b8ef6dcf57ull}}, {{0x9d97b8704dd7f628ull,0xbfb4761b41316381ull}}, {{0x8d4e44a419776f11ull,0x3fab3f3ccdd165faull}}, {{0x320a9a18f15d4277ull,0xbfa1d1ab1c2dcceaull}}, {{0xd7abe30e7766f129ull,0x3f98259f98b4358aull}}, {{0xc42e1ee46fa6bfc4ull,0xbf8e434d2e783f5bull}}, {{0x1b5382cdffa97422ull,0x3f843981254dd0d5ull}} +}; +static const q COSC[16]={ + {{0x5555555555555555ull,0x3ffa555555555555ull}}, {{0x6c16c16c16c16c17ull,0xbff56c16c16c16c1ull}}, {{0xa01a01a01a01a01aull,0x3fefa01a01a01a01ull}}, {{0xc72ef016d3ea6679ull,0xbfe927e4fb7789f5ull}}, {{0x7b544da987acfe85ull,0x3fe21eed8eff8d89ull}}, {{0xd20badf145dfa3e5ull,0xbfda93974a8c07c9ull}}, {{0xf11d8656b0ee8cb0ull,0x3fd2ae7f3e733b81ull}}, {{0x77bb004886a2c2abull,0xbfca6827863b97d9ull}}, {{0x507a9cad2bf8f0bbull,0x3fc1e542ba402022ull}}, {{0x29450c90b7f338ecull,0xbfb90ce396db7f85ull}}, {{0x7cca4b4067ca9d8aull,0x3faff2cf01972f57ull}}, {{0x9a38f2050ba6b015ull,0xbfa688e85fc6a4e5ull}}, {{0xd373c5c51c354a8dull,0x3f9d0a18a2635085ull}}, {{0xe60caded4c2989c5ull,0xbf933932c5047d60ull}}, {{0xc42e1ee46fa6bfc4ull,0x3f89434d2e783f5bull}}, {{0xa13f8a2b4af9d6b7ull,0xbf7f2710231c0fd7ull}} +}; +static q expq(q x){ + q t=qadd(qmul(x,LOG2E),HALF); int64_t k=f128M_to_i64(&t,softfloat_round_min,false); q qk=qi(k); + q r=qsub(qsub(x,qmul(qk,LN2HI)),qmul(qk,LN2LO)); + q p=EXC[24]; for(int i=23;i>=0;i--) p=qadd(qmul(p,r),EXC[i]); return qmul(p,q2k((int)k));} +static q logq(q x){ + uint64_t hi=x.v[1]; int ef=((hi>>48)&0x7fff)-16383; + q m; m.v[0]=x.v[0]; m.v[1]=(hi&0xffffffffffffULL)|(16383ULL<<48); + if(f128M_le(&SQRT2,&m)){ m=qmul(m,HALF); ef++; } + q f=qsub(m,ONE), s=qdiv(f,qadd(m,ONE)), z=qmul(s,s); + q p2=LOGC[22]; for(int i=21;i>=0;i--) p2=qadd(qmul(p2,z),LOGC[i]); + q r=qmul(qadd(z,z),p2), l1=qsub(f,qmul(s,qsub(f,r))), e=qi(ef); + return qadd(qmul(e,LN2HI),qadd(l1,qmul(e,LN2LO)));} +static q ksinq(q x,q y){ + q z=qmul(x,x); + q r=SINC[15]; for(int i=14;i>=1;i--) r=qadd(qmul(r,z),SINC[i]); + q v=qmul(z,x), aa=qsub(qmul(HALF,y),qmul(v,r)), bb=qsub(qmul(z,aa),y), cc=qsub(bb,qmul(v,SINC[0])); + return qsub(x,cc);} +static q kcosq(q x,q y){ + q z=qmul(x,x); + q rc=COSC[15]; for(int i=14;i>=0;i--) rc=qadd(qmul(rc,z),COSC[i]); + q hz=qmul(HALF,z), w2=qsub(ONE,hz); + q aa=qsub(qsub(ONE,w2),hz), bb=qsub(qmul(qmul(z,z),rc),qmul(x,y)); + return qadd(w2,qadd(aa,bb));} +static void reduce(q ax,int64_t*qq,q*rhi,q*rlo){ + q t=qadd(qmul(ax,INVPIO2),HALF); int64_t k=f128M_to_i64(&t,softfloat_round_min,false); + q qk=qi(k), hp=qsub(ax,qmul(qk,PIO2_1)), w=qmul(qk,PIO2_1T); + *qq=k; *rhi=qsub(hp,w); *rlo=qsub(qsub(hp,*rhi),w);} +static q sinq(q x){ + int neg=x.v[1]>>63; q ax=x; ax.v[1]&=0x7fffffffffffffffULL; + int64_t k; q rhi,rlo; reduce(ax,&k,&rhi,&rlo); int m=((int)k)&3; + q ks=ksinq(rhi,rlo), kc=kcosq(rhi,rlo); + q v = m==0?ks : m==1?kc : m==2?negq(ks):negq(kc); + return neg?negq(v):v;} +static q cosq(q x){ + q ax=x; ax.v[1]&=0x7fffffffffffffffULL; + int64_t k; q rhi,rlo; reduce(ax,&k,&rhi,&rlo); int m=((int)k)&3; + q ks=ksinq(rhi,rlo), kc=kcosq(rhi,rlo); + return m==0?kc : m==1?negq(ks) : m==2?negq(kc):ks;} +int main(void){ + softfloat_roundingMode=softfloat_round_near_even; + mpfr_t tr; mpfr_init2(tr,300); double we=0,wl=0,ws=0,wc=0,wq=0; q inv128=q2k(-7); + for(int i=-2560;i<=2560;i++){ q x=qmul(qi(i),inv128); q g=expq(x); + mpfr_set_si(tr,i,MPFR_RNDN); mpfr_div_ui(tr,tr,128,MPFR_RNDN); mpfr_exp(tr,tr,MPFR_RNDN); + double e=ulp_err(g,tr); if(e>we)we=e; } + for(int i=1;i<=25600;i++){ q x=qmul(qi(i),inv128); q g=logq(x); + mpfr_set_si(tr,i,MPFR_RNDN); mpfr_div_ui(tr,tr,128,MPFR_RNDN); mpfr_log(tr,tr,MPFR_RNDN); if(i==128)continue; + double e=ulp_err(g,tr); if(e>wl)wl=e; } + for(int i=-12800;i<=12800;i++){ q x=qmul(qi(i),inv128); + mpfr_set_si(tr,i,MPFR_RNDN); mpfr_div_ui(tr,tr,128,MPFR_RNDN); + mpfr_t ts,tc; mpfr_init2(ts,300); mpfr_init2(tc,300); mpfr_sin(ts,tr,MPFR_RNDN); mpfr_cos(tc,tr,MPFR_RNDN); + double es=mpfr_get_d(ts,MPFR_RNDN), ec=mpfr_get_d(tc,MPFR_RNDN); + if(es>1e-9||es<-1e-9){double e=ulp_err(sinq(x),ts); if(e>ws)ws=e;} + if(ec>1e-9||ec<-1e-9){double e=ulp_err(cosq(x),tc); if(e>wc)wc=e;} + mpfr_clear(ts); mpfr_clear(tc); } + for(int i=1;i<=25600;i++){ q x=qmul(qi(i),inv128); q g=qsqt(x); + mpfr_set_si(tr,i,MPFR_RNDN); mpfr_div_ui(tr,tr,128,MPFR_RNDN); mpfr_sqrt(tr,tr,MPFR_RNDN); + double e=ulp_err(g,tr); if(e>wq)wq=e; } + printf("# rq: exp %.3f log %.3f sin %.3f cos %.3f sqrt %.3f ULP\n",we,wl,ws,wc,wq); + qprint("sin(1) = ",sinq(qi(1))); + qprint("cos(1) = ",cosq(qi(1))); + qprint("sqrt(2) = ",qsqt(qi(2))); + mpfr_clear(tr); return 0;}