diff --git a/src/include/ops.hpp b/src/include/ops.hpp index 3d21799..2fd9299 100644 --- a/src/include/ops.hpp +++ b/src/include/ops.hpp @@ -612,7 +612,11 @@ quad_pow(const Sleef_quad *a, const Sleef_quad *b) inline Sleef_quad quad_mod(const Sleef_quad *a, const Sleef_quad *b) { - // division by zero + // Division by zero -> NaN. IEEE 754 leaves the sign of a NaN unspecified, + // and libc fmod's NaN payload varies by platform (e.g. x86 glibc returns + // signbit=1, other libms may return signbit=0). We return our canonical + // NaN unconditionally; callers comparing against numpy should use isnan, + // not signbit, for this branch. if (Sleef_icmpeqq1(*b, QUAD_PRECISION_ZERO)) { return QUAD_PRECISION_NAN; } @@ -640,22 +644,27 @@ quad_mod(const Sleef_quad *a, const Sleef_quad *b) // return a if sign_a == sign_b return (sign_a == sign_b) ? *a : *b; } + + // In floor(a/b) the a/b gives the rounded quotient, off by at most 0.5ULP. + // But "0.5 ulp of the quotient" can correspond to a huge absolute error when the quotient itself is huge. + // hence, taking the fmod path + Sleef_quad result = Sleef_fmodq1(*a, *b); + // if result != 0 and sign(result) != sign(b) + if (!Sleef_icmpeqq1(result, QUAD_PRECISION_ZERO) && (quad_signbit(&result) != quad_signbit(b))) + { + /* Interesting thing, as per "Sterbenz lemma" (https://en.wikipedia.org/wiki/Sterbenz_lemma): + |b|/2 ≤ |result| < |b| => result is exact, and thus we can add b without losing precision + |result| < |b|/2 => inexact but bounded by 0.5 ulp - // NumPy mod formula: a % b = a - floor(a/b) * b - Sleef_quad quotient = Sleef_divq1_u05(*a, *b); - Sleef_quad floored = Sleef_floorq1(quotient); - Sleef_quad product = Sleef_mulq1_u05(floored, *b); - Sleef_quad result = Sleef_subq1_u05(*a, product); + In both the cases the final output will always within max of 0.5 ulp error (as documented by Sleef_addq1_u05) hence we will be in safe range. + */ + result = Sleef_addq1_u05(result, *b); + } // Handle zero result sign: when result is exactly zero, // it should have the same sign as the divisor (NumPy convention) if (Sleef_icmpeqq1(result, QUAD_PRECISION_ZERO)) { - if (Sleef_icmpltq1(*b, QUAD_PRECISION_ZERO)) { - return Sleef_negq1(QUAD_PRECISION_ZERO); // -0.0 - } - else { - return QUAD_PRECISION_ZERO; // +0.0 - } + return Sleef_copysignq1(QUAD_PRECISION_ZERO, *b); } return result; @@ -669,7 +678,11 @@ quad_fmod(const Sleef_quad *a, const Sleef_quad *b) return Sleef_iunordq1(*a, *a) ? *a : *b; } - // Division by zero -> NaN + // Division by zero -> NaN. IEEE 754 leaves the sign of a NaN unspecified, + // and libc fmod's NaN payload varies by platform (e.g. x86 glibc returns + // signbit=1, other libms may return signbit=0). We return our canonical + // NaN unconditionally; callers comparing against numpy should use isnan, + // not signbit, for this branch. if (Sleef_icmpeqq1(*b, QUAD_PRECISION_ZERO)) { return QUAD_PRECISION_NAN; } @@ -1255,12 +1268,13 @@ ld_mod(const long double *a, const long double *b) return (sign_a == sign_b) ? *a : *b; } - long double quotient = (*a) / (*b); - long double floored = floorl(quotient); - long double result = (*a) - floored * (*b); + long double result = fmodl(*a, *b); + if (result != 0.0L && signbit(result) != signbit(*b)) { + result += *b; + } if (result == 0.0L) { - return (*b < 0.0L) ? -0.0L : 0.0L; + return copysignl(0.0L, *b); } return result; diff --git a/tests/test_quaddtype.py b/tests/test_quaddtype.py index 58a8b7d..2499136 100644 --- a/tests/test_quaddtype.py +++ b/tests/test_quaddtype.py @@ -2688,6 +2688,70 @@ def test_mod(a, b, backend, op): assert result_negative == numpy_negative, f"Sign mismatch for {a} % {b}: quad={result_negative}, numpy={numpy_negative}" +@pytest.mark.parametrize("op", [np.mod, np.remainder]) +@pytest.mark.parametrize("a_func,a_arg,b_str", [ + # Regression: huge dividend, finite divisor. cosh(-11357.2...) is + # ~1.19e+4932, so |a/b| is enormous. The old `a - floor(a/b)*b` formula + # lost all precision here because 0.5 ulp of the quotient corresponds + # to a huge absolute error. The fmod-based path avoids this. + ("cosh", "-11357.216553474703894801348310092223", + "-1.7976931345860068e+308"), + # Same with mismatched signs — exercises the Python-convention sign + # adjustment on a huge result. + ("cosh", "-11357.216553474703894801348310092223", + "1.7976931345860068e+308"), + # Large quotient, small divisor. + ("exp", "1000", "3.14159265358979323846264338327950288"), +]) +def test_mod_high_precision_sleef(a_func, a_arg, b_str, op): + """Sleef backend: verified against mpmath at binary128 precision. + + These inputs lie far outside f64 range, so the standard test_mod path + cannot exercise them — mpmath is the only viable ground truth. + """ + mp.prec = 113 + mp_a = getattr(mp, a_func)(mp.mpf(a_arg)) + mp_b = mp.mpf(b_str) + # mp.fmod is Python-style (sign of divisor), matching np.mod. + mp_result = mp.fmod(mp_a, mp_b) + expected = QuadPrecision(mp.nstr(mp_result, 36)) + + quad_a = getattr(np, a_func)(QuadPrecision(a_arg)) + quad_b = QuadPrecision(b_str) + quad_result = op(quad_a, quad_b) + + assert np.isclose(quad_result, expected, rtol=1e-34, atol=0), ( + f"sleef mismatch for {op.__name__}({a_func}({a_arg}), {b_str}): " + f"quad={quad_result}, expected={expected}" + ) + + +@pytest.mark.parametrize("op", [np.mod, np.remainder]) +@pytest.mark.parametrize("a_func,a_arg,b_str", [ + ("exp", "700", "3.14159265358979323846264338327950288"), +]) +def test_mod_high_precision_longdouble(a_func, a_arg, b_str, op): + """Longdouble backend: cross-checked against numpy's np.longdouble. + + The backend stores a C `long double`, the same type as np.longdouble, + so np.mod on np.longdouble inputs is a direct same-precision reference + and the result should match bit-for-bit. + """ + np_a = getattr(np, a_func)(np.longdouble(a_arg)) + np_b = np.longdouble(b_str) + expected = op(np_a, np_b) + + quad_a = getattr(np, a_func)(QuadPrecision(a_arg, backend="longdouble")) + quad_b = QuadPrecision(b_str, backend="longdouble") + quad_result = op(quad_a, quad_b) + + # np.longdouble(QuadPrecision) is a lossless cast (same underlying type). + assert np.longdouble(quad_result) == expected, ( + f"longdouble mismatch for {op.__name__}({a_func}({a_arg}), {b_str}): " + f"quad={np.longdouble(quad_result)!r}, expected={expected!r}" + ) + + @pytest.mark.parametrize("backend", ["sleef", "longdouble"]) @pytest.mark.parametrize("a,b", [ # Basic cases - positive/positive