From bcee399ab946e440597c8969267ccf72fbecd4d2 Mon Sep 17 00:00:00 2001 From: SwayamInSync Date: Fri, 15 May 2026 17:24:12 +0000 Subject: [PATCH 1/4] calculate mod using fmod --- src/include/ops.hpp | 41 +++++++++++++++++++++++++---------- tests/test_quaddtype.py | 47 +++++++++++++++++++++++++++++++++++++++++ 2 files changed, 77 insertions(+), 11 deletions(-) diff --git a/src/include/ops.hpp b/src/include/ops.hpp index 3d21799..b538f40 100644 --- a/src/include/ops.hpp +++ b/src/include/ops.hpp @@ -612,7 +612,11 @@ quad_pow(const Sleef_quad *a, const Sleef_quad *b) inline Sleef_quad quad_mod(const Sleef_quad *a, const Sleef_quad *b) { - // division by zero + // Division by zero -> NaN. IEEE 754 leaves the sign of a NaN unspecified, + // and libc fmod's NaN payload varies by platform (e.g. x86 glibc returns + // signbit=1, other libms may return signbit=0). We return our canonical + // NaN unconditionally; callers comparing against numpy should use isnan, + // not signbit, for this branch. if (Sleef_icmpeqq1(*b, QUAD_PRECISION_ZERO)) { return QUAD_PRECISION_NAN; } @@ -640,12 +644,22 @@ quad_mod(const Sleef_quad *a, const Sleef_quad *b) // return a if sign_a == sign_b return (sign_a == sign_b) ? *a : *b; } + + // In floor(a/b) the a/b gives the rounded quotient, off by at most 0.5ULP. + // But "0.5 ulp of the quotient" can correspond to a huge absolute error when the quotient itself is huge. + // hence, taking the fmod path + Sleef_quad result = Sleef_fmodq1(*a, *b); + // if result != 0 and sign(result) != sign(b) + if (!Sleef_icmpeqq1(result, QUAD_PRECISION_ZERO) && (quad_signbit(&result) != quad_signbit(b))) + { + /* Interesting thing, as per "Sterbenz lemma" (https://en.wikipedia.org/wiki/Sterbenz_lemma): + |b|/2 ≤ |result| < |b| => result is exact, and thus we can add b without losing precision + |result| < |b|/2 => inexact but bounded by 0.5 ulp - // NumPy mod formula: a % b = a - floor(a/b) * b - Sleef_quad quotient = Sleef_divq1_u05(*a, *b); - Sleef_quad floored = Sleef_floorq1(quotient); - Sleef_quad product = Sleef_mulq1_u05(floored, *b); - Sleef_quad result = Sleef_subq1_u05(*a, product); + In both the cases the final output will always within max of 0.5 ulp error (as documented by Sleef_addq1_u05) hence we will be in safe range. + */ + result = Sleef_addq1_u05(result, *b); + } // Handle zero result sign: when result is exactly zero, // it should have the same sign as the divisor (NumPy convention) @@ -669,7 +683,11 @@ quad_fmod(const Sleef_quad *a, const Sleef_quad *b) return Sleef_iunordq1(*a, *a) ? *a : *b; } - // Division by zero -> NaN + // Division by zero -> NaN. IEEE 754 leaves the sign of a NaN unspecified, + // and libc fmod's NaN payload varies by platform (e.g. x86 glibc returns + // signbit=1, other libms may return signbit=0). We return our canonical + // NaN unconditionally; callers comparing against numpy should use isnan, + // not signbit, for this branch. if (Sleef_icmpeqq1(*b, QUAD_PRECISION_ZERO)) { return QUAD_PRECISION_NAN; } @@ -1255,12 +1273,13 @@ ld_mod(const long double *a, const long double *b) return (sign_a == sign_b) ? *a : *b; } - long double quotient = (*a) / (*b); - long double floored = floorl(quotient); - long double result = (*a) - floored * (*b); + long double result = fmodl(*a, *b); + if (result != 0.0L && signbit(result) != signbit(*b)) { + result += *b; + } if (result == 0.0L) { - return (*b < 0.0L) ? -0.0L : 0.0L; + return copysignl(0.0L, *b); } return result; diff --git a/tests/test_quaddtype.py b/tests/test_quaddtype.py index 58a8b7d..d38c3c0 100644 --- a/tests/test_quaddtype.py +++ b/tests/test_quaddtype.py @@ -2688,6 +2688,53 @@ def test_mod(a, b, backend, op): assert result_negative == numpy_negative, f"Sign mismatch for {a} % {b}: quad={result_negative}, numpy={numpy_negative}" +@pytest.mark.parametrize("backend", ["sleef", "longdouble"]) +@pytest.mark.parametrize("op", [np.mod, np.remainder]) +@pytest.mark.parametrize("a_func,a_arg,b_str", [ + # Regression: huge dividend, finite divisor. cosh(-11357.2...) overflows + # f64 by thousands of orders of magnitude, so |a/b| is enormous. The old + # `a - floor(a/b)*b` formula loses precision here because 0.5 ulp of the + # quotient corresponds to a huge absolute error. The fmod-based path + # avoids this. The cosh value (~1.19e+4932) also exceeds LDBL_MAX, + # so longdouble is skipped for these cases. + ("cosh", "-11357.216553474703894801348310092223", + "-1.7976931345860068e+308"), + ("cosh", "-11357.216553474703894801348310092223", + "1.7976931345860068e+308"), + # Large quotient, small divisor — within long double range so both + # backends can be tested. + ("exp", "1000", "3.14159265358979323846264338327950288"), +]) +def test_mod_high_precision(a_func, a_arg, b_str, op, backend): + """Regression: mod must stay accurate when |a/b| is astronomically large. + + Verified against mpmath; numpy f64 cannot represent these inputs, so the + standard test_mod path doesn't catch this. Precision of the comparison + is chosen to match the backend's working precision. + """ + np_func = getattr(np, a_func) + quad_a = np_func(QuadPrecision(a_arg, backend=backend)) + if not np.isfinite(quad_a): + pytest.skip(f"{a_func}({a_arg}) overflows the {backend} backend") + quad_b = QuadPrecision(b_str, backend=backend) + + # mpmath ground truth at the backend's precision (sleef = 113-bit + # binary128, longdouble = typically 64-bit x86 extended on Linux). + mp.prec = 113 if backend == "sleef" else 64 + mp_func = getattr(mp, a_func) + mp_a = mp_func(mp.mpf(a_arg)) + mp_b = mp.mpf(b_str) + # mp.fmod follows Python convention (sign of divisor), matching np.mod. + mp_result = mp.fmod(mp_a, mp_b) + expected = QuadPrecision(mp.nstr(mp_result, 36), backend=backend) + quad_result = op(quad_a, quad_b) + rtol = 1e-32 if backend == "sleef" else 1e-18 + assert np.isclose(quad_result, expected, rtol=rtol, atol=0), ( + f"High-precision mismatch for {op.__name__}({a_func}({a_arg}), {b_str}) " + f"[{backend}]: quad={quad_result}, expected={expected}" + ) + + @pytest.mark.parametrize("backend", ["sleef", "longdouble"]) @pytest.mark.parametrize("a,b", [ # Basic cases - positive/positive From 915bb32de46292f35bb72f7f92e03b61b6f7cad4 Mon Sep 17 00:00:00 2001 From: SwayamInSync Date: Fri, 15 May 2026 17:40:26 +0000 Subject: [PATCH 2/4] use numpy as gt for ld --- tests/test_quaddtype.py | 79 +++++++++++++++++++++++++---------------- 1 file changed, 48 insertions(+), 31 deletions(-) diff --git a/tests/test_quaddtype.py b/tests/test_quaddtype.py index d38c3c0..f538107 100644 --- a/tests/test_quaddtype.py +++ b/tests/test_quaddtype.py @@ -2688,50 +2688,67 @@ def test_mod(a, b, backend, op): assert result_negative == numpy_negative, f"Sign mismatch for {a} % {b}: quad={result_negative}, numpy={numpy_negative}" -@pytest.mark.parametrize("backend", ["sleef", "longdouble"]) @pytest.mark.parametrize("op", [np.mod, np.remainder]) @pytest.mark.parametrize("a_func,a_arg,b_str", [ - # Regression: huge dividend, finite divisor. cosh(-11357.2...) overflows - # f64 by thousands of orders of magnitude, so |a/b| is enormous. The old - # `a - floor(a/b)*b` formula loses precision here because 0.5 ulp of the - # quotient corresponds to a huge absolute error. The fmod-based path - # avoids this. The cosh value (~1.19e+4932) also exceeds LDBL_MAX, - # so longdouble is skipped for these cases. + # Regression: huge dividend, finite divisor. cosh(-11357.2...) is + # ~1.19e+4932, so |a/b| is enormous. The old `a - floor(a/b)*b` formula + # lost all precision here because 0.5 ulp of the quotient corresponds + # to a huge absolute error. The fmod-based path avoids this. ("cosh", "-11357.216553474703894801348310092223", "-1.7976931345860068e+308"), + # Same with mismatched signs — exercises the Python-convention sign + # adjustment on a huge result. ("cosh", "-11357.216553474703894801348310092223", "1.7976931345860068e+308"), - # Large quotient, small divisor — within long double range so both - # backends can be tested. + # Large quotient, small divisor. ("exp", "1000", "3.14159265358979323846264338327950288"), ]) -def test_mod_high_precision(a_func, a_arg, b_str, op, backend): - """Regression: mod must stay accurate when |a/b| is astronomically large. +def test_mod_high_precision_sleef(a_func, a_arg, b_str, op): + """Sleef backend: verified against mpmath at binary128 precision. - Verified against mpmath; numpy f64 cannot represent these inputs, so the - standard test_mod path doesn't catch this. Precision of the comparison - is chosen to match the backend's working precision. + These inputs lie far outside f64 range, so the standard test_mod path + cannot exercise them — mpmath is the only viable ground truth. """ - np_func = getattr(np, a_func) - quad_a = np_func(QuadPrecision(a_arg, backend=backend)) - if not np.isfinite(quad_a): - pytest.skip(f"{a_func}({a_arg}) overflows the {backend} backend") - quad_b = QuadPrecision(b_str, backend=backend) - - # mpmath ground truth at the backend's precision (sleef = 113-bit - # binary128, longdouble = typically 64-bit x86 extended on Linux). - mp.prec = 113 if backend == "sleef" else 64 - mp_func = getattr(mp, a_func) - mp_a = mp_func(mp.mpf(a_arg)) + mp.prec = 113 + mp_a = getattr(mp, a_func)(mp.mpf(a_arg)) mp_b = mp.mpf(b_str) - # mp.fmod follows Python convention (sign of divisor), matching np.mod. + # mp.fmod is Python-style (sign of divisor), matching np.mod. mp_result = mp.fmod(mp_a, mp_b) - expected = QuadPrecision(mp.nstr(mp_result, 36), backend=backend) + expected = QuadPrecision(mp.nstr(mp_result, 36)) + + quad_a = getattr(np, a_func)(QuadPrecision(a_arg)) + quad_b = QuadPrecision(b_str) + quad_result = op(quad_a, quad_b) + + assert np.isclose(quad_result, expected, rtol=1e-34, atol=0), ( + f"sleef mismatch for {op.__name__}({a_func}({a_arg}), {b_str}): " + f"quad={quad_result}, expected={expected}" + ) + + +@pytest.mark.parametrize("op", [np.mod, np.remainder]) +@pytest.mark.parametrize("a_func,a_arg,b_str", [ + ("exp", "1000", "3.14159265358979323846264338327950288"), +]) +def test_mod_high_precision_longdouble(a_func, a_arg, b_str, op): + """Longdouble backend: cross-checked against numpy's np.longdouble. + + The backend stores a C `long double`, the same type as np.longdouble, + so np.mod on np.longdouble inputs is a direct same-precision reference + and the result should match bit-for-bit. + """ + np_a = getattr(np, a_func)(np.longdouble(a_arg)) + np_b = np.longdouble(b_str) + expected = op(np_a, np_b) + + quad_a = getattr(np, a_func)(QuadPrecision(a_arg, backend="longdouble")) + quad_b = QuadPrecision(b_str, backend="longdouble") quad_result = op(quad_a, quad_b) - rtol = 1e-32 if backend == "sleef" else 1e-18 - assert np.isclose(quad_result, expected, rtol=rtol, atol=0), ( - f"High-precision mismatch for {op.__name__}({a_func}({a_arg}), {b_str}) " - f"[{backend}]: quad={quad_result}, expected={expected}" + + # np.longdouble(QuadPrecision) is a lossless cast (same underlying type). + assert np.longdouble(quad_result) == expected, ( + f"longdouble mismatch for {op.__name__}({a_func}({a_arg}), {b_str}): " + f"quad={np.longdouble(quad_result)!r}, expected={expected!r}" ) From f441dcd5c24643b72279a3c8a32252f2ac1bd000 Mon Sep 17 00:00:00 2001 From: SwayamInSync Date: Fri, 15 May 2026 17:56:52 +0000 Subject: [PATCH 3/4] exp(1000) -> exp(700) for ld backend in tests --- tests/test_quaddtype.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tests/test_quaddtype.py b/tests/test_quaddtype.py index f538107..2499136 100644 --- a/tests/test_quaddtype.py +++ b/tests/test_quaddtype.py @@ -2728,7 +2728,7 @@ def test_mod_high_precision_sleef(a_func, a_arg, b_str, op): @pytest.mark.parametrize("op", [np.mod, np.remainder]) @pytest.mark.parametrize("a_func,a_arg,b_str", [ - ("exp", "1000", "3.14159265358979323846264338327950288"), + ("exp", "700", "3.14159265358979323846264338327950288"), ]) def test_mod_high_precision_longdouble(a_func, a_arg, b_str, op): """Longdouble backend: cross-checked against numpy's np.longdouble. From 424252c9b63b867c89e16fc66d30b861fac982f5 Mon Sep 17 00:00:00 2001 From: swayaminsync Date: Sat, 16 May 2026 12:36:15 +0530 Subject: [PATCH 4/4] copysign for zero mod res in sleef --- src/include/ops.hpp | 7 +------ 1 file changed, 1 insertion(+), 6 deletions(-) diff --git a/src/include/ops.hpp b/src/include/ops.hpp index b538f40..2fd9299 100644 --- a/src/include/ops.hpp +++ b/src/include/ops.hpp @@ -664,12 +664,7 @@ quad_mod(const Sleef_quad *a, const Sleef_quad *b) // Handle zero result sign: when result is exactly zero, // it should have the same sign as the divisor (NumPy convention) if (Sleef_icmpeqq1(result, QUAD_PRECISION_ZERO)) { - if (Sleef_icmpltq1(*b, QUAD_PRECISION_ZERO)) { - return Sleef_negq1(QUAD_PRECISION_ZERO); // -0.0 - } - else { - return QUAD_PRECISION_ZERO; // +0.0 - } + return Sleef_copysignq1(QUAD_PRECISION_ZERO, *b); } return result;