Skip to content

Commit 95a253a

Browse files
committed
fix heisenbugs
1 parent 9b4d83d commit 95a253a

3 files changed

Lines changed: 67 additions & 77 deletions

File tree

quaddtype/numpy_quaddtype/src/casts.cpp

Lines changed: 34 additions & 76 deletions
Original file line numberDiff line numberDiff line change
@@ -26,6 +26,7 @@ extern "C" {
2626
#include "lock.h"
2727
#include "dragon4.h"
2828
#include "ops.hpp"
29+
#include "constants.hpp"
2930

3031
#define NUM_CASTS 40 // 18 to_casts + 18 from_casts + 1 quad_to_quad + 1 void_to_quad
3132
#define QUAD_STR_WIDTH 50 // 42 is enough for scientific notation float128, just keeping some buffer
@@ -53,7 +54,7 @@ quad_to_quad_resolve_descriptors(PyObject *NPY_UNUSED(self),
5354
// Different backends require actual conversion, no view possible
5455
*view_offset = NPY_MIN_INTP;
5556
if (given_descrs[0]->backend == BACKEND_SLEEF) {
56-
return NPY_UNSAFE_CASTING; // SLEEF -> long double may lose precision
57+
return NPY_SAME_KIND_CASTING; // SLEEF -> long double may lose precision
5758
}
5859
// long double -> SLEEF preserves value exactly
5960
return static_cast<NPY_CASTING>(NPY_SAFE_CASTING | NPY_SAME_VALUE_CASTING_FLAG);
@@ -63,10 +64,11 @@ quad_to_quad_resolve_descriptors(PyObject *NPY_UNUSED(self),
6364
return NPY_NO_CASTING;
6465
}
6566

67+
template <bool Aligned>
6668
static int
67-
quad_to_quad_strided_loop_unaligned(PyArrayMethod_Context *context, char *const data[],
68-
npy_intp const dimensions[], npy_intp const strides[],
69-
void *NPY_UNUSED(auxdata))
69+
quad_to_quad_strided_loop(PyArrayMethod_Context *context, char *const data[],
70+
npy_intp const dimensions[], npy_intp const strides[],
71+
void *NPY_UNUSED(auxdata))
7072
{
7173
npy_intp N = dimensions[0];
7274
char *in_ptr = data[0];
@@ -76,94 +78,50 @@ quad_to_quad_strided_loop_unaligned(PyArrayMethod_Context *context, char *const
7678

7779
QuadPrecDTypeObject *descr_in = (QuadPrecDTypeObject *)context->descriptors[0];
7880
QuadPrecDTypeObject *descr_out = (QuadPrecDTypeObject *)context->descriptors[1];
81+
QuadBackendType backend_in = descr_in->backend;
82+
QuadBackendType backend_out = descr_out->backend;
7983

8084
// inter-backend casting
81-
if (descr_in->backend != descr_out->backend) {
85+
if (backend_in != backend_out) {
8286
while (N--) {
83-
quad_value in_val, out_val;
84-
if (descr_in->backend == BACKEND_SLEEF) {
85-
memcpy(&in_val.sleef_value, in_ptr, sizeof(Sleef_quad));
86-
out_val.longdouble_value = Sleef_cast_to_doubleq1(in_val.sleef_value);
87+
quad_value in_val = load_quad<Aligned>(in_ptr, backend_in);
88+
if (backend_in == BACKEND_SLEEF)
89+
{
90+
long double res = Sleef_cast_to_doubleq1(in_val.sleef_value);
91+
store<Aligned>(out_ptr, res);
8792
}
88-
else {
89-
memcpy(&in_val.longdouble_value, in_ptr, sizeof(long double));
90-
out_val.sleef_value = Sleef_cast_from_doubleq1(in_val.longdouble_value);
93+
else
94+
{
95+
Sleef_quad res;
96+
long double ld = in_val.longdouble_value;
97+
if (std::isnan(ld)) {
98+
res = QUAD_PRECISION_NAN;
99+
}
100+
else if (std::isinf(ld)) {
101+
res = (ld > 0) ? QUAD_PRECISION_INF : QUAD_PRECISION_NINF;
102+
}
103+
else
104+
{
105+
res = Sleef_cast_from_doubleq1(static_cast<double>(ld));
106+
}
107+
store<Aligned>(out_ptr, res);
91108
}
92-
memcpy(out_ptr, &out_val,
93-
(descr_out->backend == BACKEND_SLEEF) ? sizeof(Sleef_quad)
94-
: sizeof(long double));
95109
in_ptr += in_stride;
96110
out_ptr += out_stride;
97111
}
98-
99112
return 0;
100113
}
101114

102-
size_t elem_size =
103-
(descr_in->backend == BACKEND_SLEEF) ? sizeof(Sleef_quad) : sizeof(long double);
104-
115+
// same backend: direct copy
105116
while (N--) {
106-
memcpy(out_ptr, in_ptr, elem_size);
117+
quad_value val = load_quad<Aligned>(in_ptr, backend_in);
118+
store_quad<Aligned>(out_ptr, val, backend_out);
107119
in_ptr += in_stride;
108120
out_ptr += out_stride;
109121
}
110122
return 0;
111123
}
112124

113-
static int
114-
quad_to_quad_strided_loop_aligned(PyArrayMethod_Context *context, char *const data[],
115-
npy_intp const dimensions[], npy_intp const strides[],
116-
void *NPY_UNUSED(auxdata))
117-
{
118-
npy_intp N = dimensions[0];
119-
char *in_ptr = data[0];
120-
char *out_ptr = data[1];
121-
npy_intp in_stride = strides[0];
122-
npy_intp out_stride = strides[1];
123-
124-
QuadPrecDTypeObject *descr_in = (QuadPrecDTypeObject *)context->descriptors[0];
125-
QuadPrecDTypeObject *descr_out = (QuadPrecDTypeObject *)context->descriptors[1];
126-
127-
// inter-backend casting
128-
if (descr_in->backend != descr_out->backend) {
129-
if (descr_in->backend == BACKEND_SLEEF) {
130-
while (N--) {
131-
Sleef_quad in_val = *(Sleef_quad *)in_ptr;
132-
*(long double *)out_ptr = Sleef_cast_to_doubleq1(in_val);
133-
in_ptr += in_stride;
134-
out_ptr += out_stride;
135-
}
136-
}
137-
else {
138-
while (N--) {
139-
long double in_val = *(long double *)in_ptr;
140-
*(Sleef_quad *)out_ptr = Sleef_cast_from_doubleq1(in_val);
141-
in_ptr += in_stride;
142-
out_ptr += out_stride;
143-
}
144-
}
145-
146-
return 0;
147-
}
148-
149-
if (descr_in->backend == BACKEND_SLEEF) {
150-
while (N--) {
151-
*(Sleef_quad *)out_ptr = *(Sleef_quad *)in_ptr;
152-
in_ptr += in_stride;
153-
out_ptr += out_stride;
154-
}
155-
}
156-
else {
157-
while (N--) {
158-
*(long double *)out_ptr = *(long double *)in_ptr;
159-
in_ptr += in_stride;
160-
out_ptr += out_stride;
161-
}
162-
}
163-
164-
return 0;
165-
}
166-
167125
static NPY_CASTING
168126
void_to_quad_resolve_descriptors(PyObject *NPY_UNUSED(self), PyArray_DTypeMeta *dtypes[2],
169127
PyArray_Descr *given_descrs[2], PyArray_Descr *loop_descrs[2],
@@ -1447,8 +1405,8 @@ init_casts_internal(void)
14471405
PyArray_DTypeMeta **quad2quad_dtypes = new PyArray_DTypeMeta *[2]{nullptr, nullptr};
14481406
PyType_Slot *quad2quad_slots = new PyType_Slot[4]{
14491407
{NPY_METH_resolve_descriptors, (void *)&quad_to_quad_resolve_descriptors},
1450-
{NPY_METH_strided_loop, (void *)&quad_to_quad_strided_loop_aligned},
1451-
{NPY_METH_unaligned_strided_loop, (void *)&quad_to_quad_strided_loop_unaligned},
1408+
{NPY_METH_strided_loop, (void *)&quad_to_quad_strided_loop<true>},
1409+
{NPY_METH_unaligned_strided_loop, (void *)&quad_to_quad_strided_loop<false>},
14521410
{0, nullptr}};
14531411

14541412
PyArrayMethod_Spec *quad2quad_spec = new PyArrayMethod_Spec{

quaddtype/numpy_quaddtype/src/utilities.h

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -48,7 +48,7 @@ load(const char *ptr)
4848
// Store a value to memory, handling alignment
4949
template <bool Aligned, typename T>
5050
static inline void
51-
store(char *ptr, T val)
51+
store(char *ptr, const T &val)
5252
{
5353
if constexpr (Aligned) {
5454
*(T *)ptr = val;

quaddtype/tests/test_quaddtype.py

Lines changed: 32 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -5368,3 +5368,35 @@ def test_hash_backends(self, backend):
53685368
"""Test hash works for both backends."""
53695369
quad_val = QuadPrecision(1.5, backend=backend)
53705370
assert hash(quad_val) == hash(1.5)
5371+
5372+
5373+
@pytest.mark.parametrize("src_backend,dst_backend", [
5374+
("sleef", "longdouble"),
5375+
("longdouble", "sleef"),
5376+
("sleef", "sleef"),
5377+
("longdouble", "longdouble"),
5378+
])
5379+
@pytest.mark.parametrize("value", [
5380+
"0.0", "-0.0", "1.0", "-1.0", "3.14159265358979323846",
5381+
"inf", "-inf", "nan", "1e100", "1e-100",
5382+
])
5383+
def test_quad_to_quad_backend_casting(src_backend, dst_backend, value):
5384+
"""Test casting between QuadPrecDType with different backends."""
5385+
5386+
src_arr = np.array([value], dtype=QuadPrecDType(backend=src_backend))
5387+
dst_arr = src_arr.astype(QuadPrecDType(backend=dst_backend))
5388+
res_arr = np.array([value], dtype=QuadPrecDType(backend=dst_backend))
5389+
5390+
expected_backend = 0 if dst_backend == 'sleef' else 1
5391+
assert dst_arr.dtype.backend == expected_backend
5392+
5393+
if np.isnan(src_arr[0]):
5394+
assert np.isnan(dst_arr[0])
5395+
elif np.isinf(src_arr[0]):
5396+
assert np.isinf(dst_arr[0])
5397+
elif src_backend != dst_backend:
5398+
np.testing.assert_allclose(dst_arr, res_arr, rtol=1e-15)
5399+
else:
5400+
np.testing.assert_array_equal(dst_arr, res_arr)
5401+
5402+

0 commit comments

Comments
 (0)