From bcee399ab946e440597c8969267ccf72fbecd4d2 Mon Sep 17 00:00:00 2001
From: SwayamInSync <hawkempire007@gmail.com>
Date: Fri, 15 May 2026 17:24:12 +0000
Subject: [PATCH 1/4] calculate mod using fmod

---
 src/include/ops.hpp     | 41 +++++++++++++++++++++++++----------
 tests/test_quaddtype.py | 47 +++++++++++++++++++++++++++++++++++++++++
 2 files changed, 77 insertions(+), 11 deletions(-)

diff --git a/src/include/ops.hpp b/src/include/ops.hpp
index 3d21799..b538f40 100644
--- a/src/include/ops.hpp
+++ b/src/include/ops.hpp
@@ -612,7 +612,11 @@ quad_pow(const Sleef_quad *a, const Sleef_quad *b)
 inline Sleef_quad
 quad_mod(const Sleef_quad *a, const Sleef_quad *b)
 {
-    // division by zero
+    // Division by zero -> NaN. IEEE 754 leaves the sign of a NaN unspecified,
+    // and libc fmod's NaN payload varies by platform (e.g. x86 glibc returns
+    // signbit=1, other libms may return signbit=0). We return our canonical
+    // NaN unconditionally; callers comparing against numpy should use isnan,
+    // not signbit, for this branch.
     if (Sleef_icmpeqq1(*b, QUAD_PRECISION_ZERO)) {
         return QUAD_PRECISION_NAN;
     }
@@ -640,12 +644,22 @@ quad_mod(const Sleef_quad *a, const Sleef_quad *b)
         // return a if sign_a == sign_b
         return (sign_a == sign_b) ? *a : *b;
     }
+    
+    // In floor(a/b) the a/b gives the rounded quotient, off by at most 0.5ULP.
+    // But "0.5 ulp of the quotient" can correspond to a huge absolute error when the quotient itself is huge.
+    // hence, taking the fmod path
+    Sleef_quad result = Sleef_fmodq1(*a, *b);
+    // if result != 0 and sign(result) != sign(b)
+    if (!Sleef_icmpeqq1(result, QUAD_PRECISION_ZERO) && (quad_signbit(&result) != quad_signbit(b))) 
+    {
+      /*  Interesting thing, as per "Sterbenz lemma" (https://en.wikipedia.org/wiki/Sterbenz_lemma):
+            |b|/2 ≤ |result| < |b| => result is exact, and thus we can add b without losing precision
+            |result| < |b|/2 => inexact but bounded by 0.5 ulp
 
-    // NumPy mod formula: a % b = a - floor(a/b) * b
-    Sleef_quad quotient = Sleef_divq1_u05(*a, *b);
-    Sleef_quad floored = Sleef_floorq1(quotient);
-    Sleef_quad product = Sleef_mulq1_u05(floored, *b);
-    Sleef_quad result = Sleef_subq1_u05(*a, product);
+          In both the cases the final output will always within max of 0.5 ulp error (as documented by Sleef_addq1_u05) hence we will be in safe range.
+      */
+      result = Sleef_addq1_u05(result, *b);
+    }
 
     // Handle zero result sign: when result is exactly zero,
     // it should have the same sign as the divisor (NumPy convention)
@@ -669,7 +683,11 @@ quad_fmod(const Sleef_quad *a, const Sleef_quad *b)
         return Sleef_iunordq1(*a, *a) ? *a : *b;
     }
     
-    // Division by zero -> NaN
+    // Division by zero -> NaN. IEEE 754 leaves the sign of a NaN unspecified,
+    // and libc fmod's NaN payload varies by platform (e.g. x86 glibc returns
+    // signbit=1, other libms may return signbit=0). We return our canonical
+    // NaN unconditionally; callers comparing against numpy should use isnan,
+    // not signbit, for this branch.
     if (Sleef_icmpeqq1(*b, QUAD_PRECISION_ZERO)) {
         return QUAD_PRECISION_NAN;
     }
@@ -1255,12 +1273,13 @@ ld_mod(const long double *a, const long double *b)
         return (sign_a == sign_b) ? *a : *b;
     }
 
-    long double quotient = (*a) / (*b);
-    long double floored = floorl(quotient);
-    long double result = (*a) - floored * (*b);
+    long double result = fmodl(*a, *b);
+    if (result != 0.0L && signbit(result) != signbit(*b)) {
+        result += *b;
+    }
 
     if (result == 0.0L) {
-        return (*b < 0.0L) ? -0.0L : 0.0L;
+        return copysignl(0.0L, *b);
     }
 
     return result;
diff --git a/tests/test_quaddtype.py b/tests/test_quaddtype.py
index 58a8b7d..d38c3c0 100644
--- a/tests/test_quaddtype.py
+++ b/tests/test_quaddtype.py
@@ -2688,6 +2688,53 @@ def test_mod(a, b, backend, op):
         assert result_negative == numpy_negative, f"Sign mismatch for {a} % {b}: quad={result_negative}, numpy={numpy_negative}"
 
 
+@pytest.mark.parametrize("backend", ["sleef", "longdouble"])
+@pytest.mark.parametrize("op", [np.mod, np.remainder])
+@pytest.mark.parametrize("a_func,a_arg,b_str", [
+    # Regression: huge dividend, finite divisor. cosh(-11357.2...) overflows
+    # f64 by thousands of orders of magnitude, so |a/b| is enormous. The old
+    # `a - floor(a/b)*b` formula loses precision here because 0.5 ulp of the
+    # quotient corresponds to a huge absolute error. The fmod-based path
+    # avoids this. The cosh value (~1.19e+4932) also exceeds LDBL_MAX,
+    # so longdouble is skipped for these cases.
+    ("cosh", "-11357.216553474703894801348310092223",
+     "-1.7976931345860068e+308"),
+    ("cosh", "-11357.216553474703894801348310092223",
+     "1.7976931345860068e+308"),
+    # Large quotient, small divisor — within long double range so both
+    # backends can be tested.
+    ("exp", "1000", "3.14159265358979323846264338327950288"),
+])
+def test_mod_high_precision(a_func, a_arg, b_str, op, backend):
+    """Regression: mod must stay accurate when |a/b| is astronomically large.
+
+    Verified against mpmath; numpy f64 cannot represent these inputs, so the
+    standard test_mod path doesn't catch this. Precision of the comparison
+    is chosen to match the backend's working precision.
+    """
+    np_func = getattr(np, a_func)
+    quad_a = np_func(QuadPrecision(a_arg, backend=backend))
+    if not np.isfinite(quad_a):
+        pytest.skip(f"{a_func}({a_arg}) overflows the {backend} backend")
+    quad_b = QuadPrecision(b_str, backend=backend)
+
+    # mpmath ground truth at the backend's precision (sleef = 113-bit
+    # binary128, longdouble = typically 64-bit x86 extended on Linux).
+    mp.prec = 113 if backend == "sleef" else 64
+    mp_func = getattr(mp, a_func)
+    mp_a = mp_func(mp.mpf(a_arg))
+    mp_b = mp.mpf(b_str)
+    # mp.fmod follows Python convention (sign of divisor), matching np.mod.
+    mp_result = mp.fmod(mp_a, mp_b)
+    expected = QuadPrecision(mp.nstr(mp_result, 36), backend=backend)
+    quad_result = op(quad_a, quad_b)
+    rtol = 1e-32 if backend == "sleef" else 1e-18
+    assert np.isclose(quad_result, expected, rtol=rtol, atol=0), (
+        f"High-precision mismatch for {op.__name__}({a_func}({a_arg}), {b_str}) "
+        f"[{backend}]: quad={quad_result}, expected={expected}"
+    )
+
+
 @pytest.mark.parametrize("backend", ["sleef", "longdouble"])
 @pytest.mark.parametrize("a,b", [
     # Basic cases - positive/positive

From 915bb32de46292f35bb72f7f92e03b61b6f7cad4 Mon Sep 17 00:00:00 2001
From: SwayamInSync <hawkempire007@gmail.com>
Date: Fri, 15 May 2026 17:40:26 +0000
Subject: [PATCH 2/4] use numpy as gt for ld

---
 tests/test_quaddtype.py | 79 +++++++++++++++++++++++++----------------
 1 file changed, 48 insertions(+), 31 deletions(-)

diff --git a/tests/test_quaddtype.py b/tests/test_quaddtype.py
index d38c3c0..f538107 100644
--- a/tests/test_quaddtype.py
+++ b/tests/test_quaddtype.py
@@ -2688,50 +2688,67 @@ def test_mod(a, b, backend, op):
         assert result_negative == numpy_negative, f"Sign mismatch for {a} % {b}: quad={result_negative}, numpy={numpy_negative}"
 
 
-@pytest.mark.parametrize("backend", ["sleef", "longdouble"])
 @pytest.mark.parametrize("op", [np.mod, np.remainder])
 @pytest.mark.parametrize("a_func,a_arg,b_str", [
-    # Regression: huge dividend, finite divisor. cosh(-11357.2...) overflows
-    # f64 by thousands of orders of magnitude, so |a/b| is enormous. The old
-    # `a - floor(a/b)*b` formula loses precision here because 0.5 ulp of the
-    # quotient corresponds to a huge absolute error. The fmod-based path
-    # avoids this. The cosh value (~1.19e+4932) also exceeds LDBL_MAX,
-    # so longdouble is skipped for these cases.
+    # Regression: huge dividend, finite divisor. cosh(-11357.2...) is
+    # ~1.19e+4932, so |a/b| is enormous. The old `a - floor(a/b)*b` formula
+    # lost all precision here because 0.5 ulp of the quotient corresponds
+    # to a huge absolute error. The fmod-based path avoids this.
     ("cosh", "-11357.216553474703894801348310092223",
      "-1.7976931345860068e+308"),
+    # Same with mismatched signs — exercises the Python-convention sign
+    # adjustment on a huge result.
     ("cosh", "-11357.216553474703894801348310092223",
      "1.7976931345860068e+308"),
-    # Large quotient, small divisor — within long double range so both
-    # backends can be tested.
+    # Large quotient, small divisor.
     ("exp", "1000", "3.14159265358979323846264338327950288"),
 ])
-def test_mod_high_precision(a_func, a_arg, b_str, op, backend):
-    """Regression: mod must stay accurate when |a/b| is astronomically large.
+def test_mod_high_precision_sleef(a_func, a_arg, b_str, op):
+    """Sleef backend: verified against mpmath at binary128 precision.
 
-    Verified against mpmath; numpy f64 cannot represent these inputs, so the
-    standard test_mod path doesn't catch this. Precision of the comparison
-    is chosen to match the backend's working precision.
+    These inputs lie far outside f64 range, so the standard test_mod path
+    cannot exercise them — mpmath is the only viable ground truth.
     """
-    np_func = getattr(np, a_func)
-    quad_a = np_func(QuadPrecision(a_arg, backend=backend))
-    if not np.isfinite(quad_a):
-        pytest.skip(f"{a_func}({a_arg}) overflows the {backend} backend")
-    quad_b = QuadPrecision(b_str, backend=backend)
-
-    # mpmath ground truth at the backend's precision (sleef = 113-bit
-    # binary128, longdouble = typically 64-bit x86 extended on Linux).
-    mp.prec = 113 if backend == "sleef" else 64
-    mp_func = getattr(mp, a_func)
-    mp_a = mp_func(mp.mpf(a_arg))
+    mp.prec = 113
+    mp_a = getattr(mp, a_func)(mp.mpf(a_arg))
     mp_b = mp.mpf(b_str)
-    # mp.fmod follows Python convention (sign of divisor), matching np.mod.
+    # mp.fmod is Python-style (sign of divisor), matching np.mod.
     mp_result = mp.fmod(mp_a, mp_b)
-    expected = QuadPrecision(mp.nstr(mp_result, 36), backend=backend)
+    expected = QuadPrecision(mp.nstr(mp_result, 36))
+
+    quad_a = getattr(np, a_func)(QuadPrecision(a_arg))
+    quad_b = QuadPrecision(b_str)
+    quad_result = op(quad_a, quad_b)
+
+    assert np.isclose(quad_result, expected, rtol=1e-34, atol=0), (
+        f"sleef mismatch for {op.__name__}({a_func}({a_arg}), {b_str}): "
+        f"quad={quad_result}, expected={expected}"
+    )
+
+
+@pytest.mark.parametrize("op", [np.mod, np.remainder])
+@pytest.mark.parametrize("a_func,a_arg,b_str", [
+    ("exp", "1000", "3.14159265358979323846264338327950288"),
+])
+def test_mod_high_precision_longdouble(a_func, a_arg, b_str, op):
+    """Longdouble backend: cross-checked against numpy's np.longdouble.
+
+    The backend stores a C `long double`, the same type as np.longdouble,
+    so np.mod on np.longdouble inputs is a direct same-precision reference
+    and the result should match bit-for-bit.
+    """
+    np_a = getattr(np, a_func)(np.longdouble(a_arg))
+    np_b = np.longdouble(b_str)
+    expected = op(np_a, np_b)
+
+    quad_a = getattr(np, a_func)(QuadPrecision(a_arg, backend="longdouble"))
+    quad_b = QuadPrecision(b_str, backend="longdouble")
     quad_result = op(quad_a, quad_b)
-    rtol = 1e-32 if backend == "sleef" else 1e-18
-    assert np.isclose(quad_result, expected, rtol=rtol, atol=0), (
-        f"High-precision mismatch for {op.__name__}({a_func}({a_arg}), {b_str}) "
-        f"[{backend}]: quad={quad_result}, expected={expected}"
+
+    # np.longdouble(QuadPrecision) is a lossless cast (same underlying type).
+    assert np.longdouble(quad_result) == expected, (
+        f"longdouble mismatch for {op.__name__}({a_func}({a_arg}), {b_str}): "
+        f"quad={np.longdouble(quad_result)!r}, expected={expected!r}"
     )
 
 

From f441dcd5c24643b72279a3c8a32252f2ac1bd000 Mon Sep 17 00:00:00 2001
From: SwayamInSync <hawkempire007@gmail.com>
Date: Fri, 15 May 2026 17:56:52 +0000
Subject: [PATCH 3/4] exp(1000) -> exp(700) for ld backend in tests

---
 tests/test_quaddtype.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/tests/test_quaddtype.py b/tests/test_quaddtype.py
index f538107..2499136 100644
--- a/tests/test_quaddtype.py
+++ b/tests/test_quaddtype.py
@@ -2728,7 +2728,7 @@ def test_mod_high_precision_sleef(a_func, a_arg, b_str, op):
 
 @pytest.mark.parametrize("op", [np.mod, np.remainder])
 @pytest.mark.parametrize("a_func,a_arg,b_str", [
-    ("exp", "1000", "3.14159265358979323846264338327950288"),
+    ("exp", "700", "3.14159265358979323846264338327950288"),
 ])
 def test_mod_high_precision_longdouble(a_func, a_arg, b_str, op):
     """Longdouble backend: cross-checked against numpy's np.longdouble.

From 424252c9b63b867c89e16fc66d30b861fac982f5 Mon Sep 17 00:00:00 2001
From: swayaminsync <hawkempire007@gmail.com>
Date: Sat, 16 May 2026 12:36:15 +0530
Subject: [PATCH 4/4] copysign for zero mod res in sleef

---
 src/include/ops.hpp | 7 +------
 1 file changed, 1 insertion(+), 6 deletions(-)

diff --git a/src/include/ops.hpp b/src/include/ops.hpp
index b538f40..2fd9299 100644
--- a/src/include/ops.hpp
+++ b/src/include/ops.hpp
@@ -664,12 +664,7 @@ quad_mod(const Sleef_quad *a, const Sleef_quad *b)
     // Handle zero result sign: when result is exactly zero,
     // it should have the same sign as the divisor (NumPy convention)
     if (Sleef_icmpeqq1(result, QUAD_PRECISION_ZERO)) {
-        if (Sleef_icmpltq1(*b, QUAD_PRECISION_ZERO)) {
-            return Sleef_negq1(QUAD_PRECISION_ZERO);  // -0.0
-        }
-        else {
-            return QUAD_PRECISION_ZERO;  // +0.0
-        }
+        return Sleef_copysignq1(QUAD_PRECISION_ZERO, *b);
     }
 
     return result;