Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
48 changes: 31 additions & 17 deletions src/include/ops.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -612,7 +612,11 @@ quad_pow(const Sleef_quad *a, const Sleef_quad *b)
inline Sleef_quad
quad_mod(const Sleef_quad *a, const Sleef_quad *b)
{
// division by zero
// Division by zero -> NaN. IEEE 754 leaves the sign of a NaN unspecified,
// and libc fmod's NaN payload varies by platform (e.g. x86 glibc returns
// signbit=1, other libms may return signbit=0). We return our canonical
// NaN unconditionally; callers comparing against numpy should use isnan,
// not signbit, for this branch.
if (Sleef_icmpeqq1(*b, QUAD_PRECISION_ZERO)) {
return QUAD_PRECISION_NAN;
}
Expand Down Expand Up @@ -640,22 +644,27 @@ quad_mod(const Sleef_quad *a, const Sleef_quad *b)
// return a if sign_a == sign_b
return (sign_a == sign_b) ? *a : *b;
}

// In floor(a/b) the a/b gives the rounded quotient, off by at most 0.5ULP.
// But "0.5 ulp of the quotient" can correspond to a huge absolute error when the quotient itself is huge.
// hence, taking the fmod path
Sleef_quad result = Sleef_fmodq1(*a, *b);
// if result != 0 and sign(result) != sign(b)
if (!Sleef_icmpeqq1(result, QUAD_PRECISION_ZERO) && (quad_signbit(&result) != quad_signbit(b)))
{
/* Interesting thing, as per "Sterbenz lemma" (https://en.wikipedia.org/wiki/Sterbenz_lemma):
|b|/2 ≤ |result| < |b| => result is exact, and thus we can add b without losing precision
|result| < |b|/2 => inexact but bounded by 0.5 ulp

// NumPy mod formula: a % b = a - floor(a/b) * b
Sleef_quad quotient = Sleef_divq1_u05(*a, *b);
Sleef_quad floored = Sleef_floorq1(quotient);
Sleef_quad product = Sleef_mulq1_u05(floored, *b);
Sleef_quad result = Sleef_subq1_u05(*a, product);
In both the cases the final output will always within max of 0.5 ulp error (as documented by Sleef_addq1_u05) hence we will be in safe range.
*/
result = Sleef_addq1_u05(result, *b);
}

// Handle zero result sign: when result is exactly zero,
// it should have the same sign as the divisor (NumPy convention)
if (Sleef_icmpeqq1(result, QUAD_PRECISION_ZERO)) {
if (Sleef_icmpltq1(*b, QUAD_PRECISION_ZERO)) {
return Sleef_negq1(QUAD_PRECISION_ZERO); // -0.0
}
else {
return QUAD_PRECISION_ZERO; // +0.0
}
return Sleef_copysignq1(QUAD_PRECISION_ZERO, *b);
}

return result;
Expand All @@ -669,7 +678,11 @@ quad_fmod(const Sleef_quad *a, const Sleef_quad *b)
return Sleef_iunordq1(*a, *a) ? *a : *b;
}

// Division by zero -> NaN
// Division by zero -> NaN. IEEE 754 leaves the sign of a NaN unspecified,
// and libc fmod's NaN payload varies by platform (e.g. x86 glibc returns
// signbit=1, other libms may return signbit=0). We return our canonical
// NaN unconditionally; callers comparing against numpy should use isnan,
// not signbit, for this branch.
if (Sleef_icmpeqq1(*b, QUAD_PRECISION_ZERO)) {
return QUAD_PRECISION_NAN;
}
Expand Down Expand Up @@ -1255,12 +1268,13 @@ ld_mod(const long double *a, const long double *b)
return (sign_a == sign_b) ? *a : *b;
}

long double quotient = (*a) / (*b);
long double floored = floorl(quotient);
long double result = (*a) - floored * (*b);
long double result = fmodl(*a, *b);
if (result != 0.0L && signbit(result) != signbit(*b)) {
result += *b;
}

if (result == 0.0L) {
return (*b < 0.0L) ? -0.0L : 0.0L;
return copysignl(0.0L, *b);
}

return result;
Expand Down
64 changes: 64 additions & 0 deletions tests/test_quaddtype.py
Original file line number Diff line number Diff line change
Expand Up @@ -2688,6 +2688,70 @@ def test_mod(a, b, backend, op):
assert result_negative == numpy_negative, f"Sign mismatch for {a} % {b}: quad={result_negative}, numpy={numpy_negative}"


@pytest.mark.parametrize("op", [np.mod, np.remainder])
@pytest.mark.parametrize("a_func,a_arg,b_str", [
# Regression: huge dividend, finite divisor. cosh(-11357.2...) is
# ~1.19e+4932, so |a/b| is enormous. The old `a - floor(a/b)*b` formula
# lost all precision here because 0.5 ulp of the quotient corresponds
# to a huge absolute error. The fmod-based path avoids this.
("cosh", "-11357.216553474703894801348310092223",
"-1.7976931345860068e+308"),
# Same with mismatched signs — exercises the Python-convention sign
# adjustment on a huge result.
("cosh", "-11357.216553474703894801348310092223",
"1.7976931345860068e+308"),
# Large quotient, small divisor.
("exp", "1000", "3.14159265358979323846264338327950288"),
])
def test_mod_high_precision_sleef(a_func, a_arg, b_str, op):
"""Sleef backend: verified against mpmath at binary128 precision.

These inputs lie far outside f64 range, so the standard test_mod path
cannot exercise them — mpmath is the only viable ground truth.
"""
mp.prec = 113
mp_a = getattr(mp, a_func)(mp.mpf(a_arg))
mp_b = mp.mpf(b_str)
# mp.fmod is Python-style (sign of divisor), matching np.mod.
mp_result = mp.fmod(mp_a, mp_b)
expected = QuadPrecision(mp.nstr(mp_result, 36))

quad_a = getattr(np, a_func)(QuadPrecision(a_arg))
quad_b = QuadPrecision(b_str)
quad_result = op(quad_a, quad_b)

assert np.isclose(quad_result, expected, rtol=1e-34, atol=0), (
f"sleef mismatch for {op.__name__}({a_func}({a_arg}), {b_str}): "
f"quad={quad_result}, expected={expected}"
)


@pytest.mark.parametrize("op", [np.mod, np.remainder])
@pytest.mark.parametrize("a_func,a_arg,b_str", [
("exp", "700", "3.14159265358979323846264338327950288"),
])
def test_mod_high_precision_longdouble(a_func, a_arg, b_str, op):
"""Longdouble backend: cross-checked against numpy's np.longdouble.

The backend stores a C `long double`, the same type as np.longdouble,
so np.mod on np.longdouble inputs is a direct same-precision reference
and the result should match bit-for-bit.
"""
np_a = getattr(np, a_func)(np.longdouble(a_arg))
np_b = np.longdouble(b_str)
expected = op(np_a, np_b)

quad_a = getattr(np, a_func)(QuadPrecision(a_arg, backend="longdouble"))
quad_b = QuadPrecision(b_str, backend="longdouble")
quad_result = op(quad_a, quad_b)

# np.longdouble(QuadPrecision) is a lossless cast (same underlying type).
assert np.longdouble(quad_result) == expected, (
f"longdouble mismatch for {op.__name__}({a_func}({a_arg}), {b_str}): "
f"quad={np.longdouble(quad_result)!r}, expected={expected!r}"
)


@pytest.mark.parametrize("backend", ["sleef", "longdouble"])
@pytest.mark.parametrize("a,b", [
# Basic cases - positive/positive
Expand Down
Loading