numpy · SwayamInSync · May 15, 2026 · May 15, 2026 · May 15, 2026 · May 16, 2026
diff --git a/src/include/ops.hpp b/src/include/ops.hpp
@@ -612,7 +612,11 @@ quad_pow(const Sleef_quad *a, const Sleef_quad *b)
 inline Sleef_quad
 quad_mod(const Sleef_quad *a, const Sleef_quad *b)
 {
-    // division by zero
+    // Division by zero -> NaN. IEEE 754 leaves the sign of a NaN unspecified,
+    // and libc fmod's NaN payload varies by platform (e.g. x86 glibc returns
+    // signbit=1, other libms may return signbit=0). We return our canonical
+    // NaN unconditionally; callers comparing against numpy should use isnan,
+    // not signbit, for this branch.
     if (Sleef_icmpeqq1(*b, QUAD_PRECISION_ZERO)) {
         return QUAD_PRECISION_NAN;
     }
@@ -640,22 +644,27 @@ quad_mod(const Sleef_quad *a, const Sleef_quad *b)
         // return a if sign_a == sign_b
         return (sign_a == sign_b) ? *a : *b;
     }
+
+    // In floor(a/b) the a/b gives the rounded quotient, off by at most 0.5ULP.
+    // But "0.5 ulp of the quotient" can correspond to a huge absolute error when the quotient itself is huge.
+    // hence, taking the fmod path
+    Sleef_quad result = Sleef_fmodq1(*a, *b);
+    // if result != 0 and sign(result) != sign(b)
+    if (!Sleef_icmpeqq1(result, QUAD_PRECISION_ZERO) && (quad_signbit(&result) != quad_signbit(b))) 
+    {
+      /*  Interesting thing, as per "Sterbenz lemma" (https://en.wikipedia.org/wiki/Sterbenz_lemma):
+            |b|/2 ≤ |result| < |b| => result is exact, and thus we can add b without losing precision
+            |result| < |b|/2 => inexact but bounded by 0.5 ulp
 
-    // NumPy mod formula: a % b = a - floor(a/b) * b
-    Sleef_quad quotient = Sleef_divq1_u05(*a, *b);
-    Sleef_quad floored = Sleef_floorq1(quotient);
-    Sleef_quad product = Sleef_mulq1_u05(floored, *b);
-    Sleef_quad result = Sleef_subq1_u05(*a, product);
+          In both the cases the final output will always within max of 0.5 ulp error (as documented by Sleef_addq1_u05) hence we will be in safe range.
+      */
+      result = Sleef_addq1_u05(result, *b);
+    }
 
     // Handle zero result sign: when result is exactly zero,
     // it should have the same sign as the divisor (NumPy convention)
     if (Sleef_icmpeqq1(result, QUAD_PRECISION_ZERO)) {
-        if (Sleef_icmpltq1(*b, QUAD_PRECISION_ZERO)) {
-            return Sleef_negq1(QUAD_PRECISION_ZERO);  // -0.0
-        }
-        else {
-            return QUAD_PRECISION_ZERO;  // +0.0
-        }
+        return Sleef_copysignq1(QUAD_PRECISION_ZERO, *b);
     }
 
     return result;
@@ -669,7 +678,11 @@ quad_fmod(const Sleef_quad *a, const Sleef_quad *b)
         return Sleef_iunordq1(*a, *a) ? *a : *b;
     }
 
-    // Division by zero -> NaN
+    // Division by zero -> NaN. IEEE 754 leaves the sign of a NaN unspecified,
+    // and libc fmod's NaN payload varies by platform (e.g. x86 glibc returns
+    // signbit=1, other libms may return signbit=0). We return our canonical
+    // NaN unconditionally; callers comparing against numpy should use isnan,
+    // not signbit, for this branch.
     if (Sleef_icmpeqq1(*b, QUAD_PRECISION_ZERO)) {
         return QUAD_PRECISION_NAN;
     }
@@ -1255,12 +1268,13 @@ ld_mod(const long double *a, const long double *b)
         return (sign_a == sign_b) ? *a : *b;
     }
 
-    long double quotient = (*a) / (*b);
-    long double floored = floorl(quotient);
-    long double result = (*a) - floored * (*b);
+    long double result = fmodl(*a, *b);
+    if (result != 0.0L && signbit(result) != signbit(*b)) {
+        result += *b;
+    }
 
     if (result == 0.0L) {
-        return (*b < 0.0L) ? -0.0L : 0.0L;
+        return copysignl(0.0L, *b);
     }
 
     return result;

diff --git a/tests/test_quaddtype.py b/tests/test_quaddtype.py
@@ -2688,6 +2688,70 @@ def test_mod(a, b, backend, op):
         assert result_negative == numpy_negative, f"Sign mismatch for {a} % {b}: quad={result_negative}, numpy={numpy_negative}"
 
 
+@pytest.mark.parametrize("op", [np.mod, np.remainder])
+@pytest.mark.parametrize("a_func,a_arg,b_str", [
+    # Regression: huge dividend, finite divisor. cosh(-11357.2...) is
+    # ~1.19e+4932, so |a/b| is enormous. The old `a - floor(a/b)*b` formula
+    # lost all precision here because 0.5 ulp of the quotient corresponds
+    # to a huge absolute error. The fmod-based path avoids this.
+    ("cosh", "-11357.216553474703894801348310092223",
+     "-1.7976931345860068e+308"),
+    # Same with mismatched signs — exercises the Python-convention sign
+    # adjustment on a huge result.
+    ("cosh", "-11357.216553474703894801348310092223",
+     "1.7976931345860068e+308"),
+    # Large quotient, small divisor.
+    ("exp", "1000", "3.14159265358979323846264338327950288"),
+])
+def test_mod_high_precision_sleef(a_func, a_arg, b_str, op):
+    """Sleef backend: verified against mpmath at binary128 precision.
+
+    These inputs lie far outside f64 range, so the standard test_mod path
+    cannot exercise them — mpmath is the only viable ground truth.
+    """
+    mp.prec = 113
+    mp_a = getattr(mp, a_func)(mp.mpf(a_arg))
+    mp_b = mp.mpf(b_str)
+    # mp.fmod is Python-style (sign of divisor), matching np.mod.
+    mp_result = mp.fmod(mp_a, mp_b)
+    expected = QuadPrecision(mp.nstr(mp_result, 36))
+
+    quad_a = getattr(np, a_func)(QuadPrecision(a_arg))
+    quad_b = QuadPrecision(b_str)
+    quad_result = op(quad_a, quad_b)
+
+    assert np.isclose(quad_result, expected, rtol=1e-34, atol=0), (
+        f"sleef mismatch for {op.__name__}({a_func}({a_arg}), {b_str}): "
+        f"quad={quad_result}, expected={expected}"
+    )
+
+
+@pytest.mark.parametrize("op", [np.mod, np.remainder])
+@pytest.mark.parametrize("a_func,a_arg,b_str", [
+    ("exp", "700", "3.14159265358979323846264338327950288"),
+])
+def test_mod_high_precision_longdouble(a_func, a_arg, b_str, op):
+    """Longdouble backend: cross-checked against numpy's np.longdouble.
+
+    The backend stores a C `long double`, the same type as np.longdouble,
+    so np.mod on np.longdouble inputs is a direct same-precision reference
+    and the result should match bit-for-bit.
+    """
+    np_a = getattr(np, a_func)(np.longdouble(a_arg))
+    np_b = np.longdouble(b_str)
+    expected = op(np_a, np_b)
+
+    quad_a = getattr(np, a_func)(QuadPrecision(a_arg, backend="longdouble"))
+    quad_b = QuadPrecision(b_str, backend="longdouble")
+    quad_result = op(quad_a, quad_b)
+
+    # np.longdouble(QuadPrecision) is a lossless cast (same underlying type).
+    assert np.longdouble(quad_result) == expected, (
+        f"longdouble mismatch for {op.__name__}({a_func}({a_arg}), {b_str}): "
+        f"quad={np.longdouble(quad_result)!r}, expected={expected!r}"
+    )
+
+
 @pytest.mark.parametrize("backend", ["sleef", "longdouble"])
 @pytest.mark.parametrize("a,b", [
     # Basic cases - positive/positive