@@ -26,6 +26,7 @@ extern "C" {
2626#include " lock.h"
2727#include " dragon4.h"
2828#include " ops.hpp"
29+ #include " constants.hpp"
2930
3031#define NUM_CASTS 40 // 18 to_casts + 18 from_casts + 1 quad_to_quad + 1 void_to_quad
3132#define QUAD_STR_WIDTH 50 // 42 is enough for scientific notation float128, just keeping some buffer
@@ -53,7 +54,7 @@ quad_to_quad_resolve_descriptors(PyObject *NPY_UNUSED(self),
5354 // Different backends require actual conversion, no view possible
5455 *view_offset = NPY_MIN_INTP;
5556 if (given_descrs[0 ]->backend == BACKEND_SLEEF) {
56- return NPY_UNSAFE_CASTING ; // SLEEF -> long double may lose precision
57+ return NPY_SAME_KIND_CASTING ; // SLEEF -> long double may lose precision
5758 }
5859 // long double -> SLEEF preserves value exactly
5960 return static_cast <NPY_CASTING>(NPY_SAFE_CASTING | NPY_SAME_VALUE_CASTING_FLAG);
@@ -63,10 +64,11 @@ quad_to_quad_resolve_descriptors(PyObject *NPY_UNUSED(self),
6364 return NPY_NO_CASTING;
6465}
6566
67+ template <bool Aligned>
6668static int
67- quad_to_quad_strided_loop_unaligned (PyArrayMethod_Context *context, char *const data[],
68- npy_intp const dimensions[], npy_intp const strides[],
69- void *NPY_UNUSED (auxdata))
69+ quad_to_quad_strided_loop (PyArrayMethod_Context *context, char *const data[],
70+ npy_intp const dimensions[], npy_intp const strides[],
71+ void *NPY_UNUSED (auxdata))
7072{
7173 npy_intp N = dimensions[0 ];
7274 char *in_ptr = data[0 ];
@@ -76,94 +78,50 @@ quad_to_quad_strided_loop_unaligned(PyArrayMethod_Context *context, char *const
7678
7779 QuadPrecDTypeObject *descr_in = (QuadPrecDTypeObject *)context->descriptors [0 ];
7880 QuadPrecDTypeObject *descr_out = (QuadPrecDTypeObject *)context->descriptors [1 ];
81+ QuadBackendType backend_in = descr_in->backend ;
82+ QuadBackendType backend_out = descr_out->backend ;
7983
8084 // inter-backend casting
81- if (descr_in-> backend != descr_out-> backend ) {
85+ if (backend_in != backend_out ) {
8286 while (N--) {
83- quad_value in_val, out_val;
84- if (descr_in->backend == BACKEND_SLEEF) {
85- memcpy (&in_val.sleef_value , in_ptr, sizeof (Sleef_quad));
86- out_val.longdouble_value = Sleef_cast_to_doubleq1 (in_val.sleef_value );
87+ quad_value in_val = load_quad<Aligned>(in_ptr, backend_in);
88+ if (backend_in == BACKEND_SLEEF)
89+ {
90+ long double res = Sleef_cast_to_doubleq1 (in_val.sleef_value );
91+ store<Aligned>(out_ptr, res);
8792 }
88- else {
89- memcpy (&in_val.longdouble_value , in_ptr, sizeof (long double ));
90- out_val.sleef_value = Sleef_cast_from_doubleq1 (in_val.longdouble_value );
93+ else
94+ {
95+ Sleef_quad res;
96+ long double ld = in_val.longdouble_value ;
97+ if (std::isnan (ld)) {
98+ res = QUAD_PRECISION_NAN;
99+ }
100+ else if (std::isinf (ld)) {
101+ res = (ld > 0 ) ? QUAD_PRECISION_INF : QUAD_PRECISION_NINF;
102+ }
103+ else
104+ {
105+ res = Sleef_cast_from_doubleq1 (static_cast <double >(ld));
106+ }
107+ store<Aligned>(out_ptr, res);
91108 }
92- memcpy (out_ptr, &out_val,
93- (descr_out->backend == BACKEND_SLEEF) ? sizeof (Sleef_quad)
94- : sizeof (long double ));
95109 in_ptr += in_stride;
96110 out_ptr += out_stride;
97111 }
98-
99112 return 0 ;
100113 }
101114
102- size_t elem_size =
103- (descr_in->backend == BACKEND_SLEEF) ? sizeof (Sleef_quad) : sizeof (long double );
104-
115+ // same backend: direct copy
105116 while (N--) {
106- memcpy (out_ptr, in_ptr, elem_size);
117+ quad_value val = load_quad<Aligned>(in_ptr, backend_in);
118+ store_quad<Aligned>(out_ptr, val, backend_out);
107119 in_ptr += in_stride;
108120 out_ptr += out_stride;
109121 }
110122 return 0 ;
111123}
112124
113- static int
114- quad_to_quad_strided_loop_aligned (PyArrayMethod_Context *context, char *const data[],
115- npy_intp const dimensions[], npy_intp const strides[],
116- void *NPY_UNUSED (auxdata))
117- {
118- npy_intp N = dimensions[0 ];
119- char *in_ptr = data[0 ];
120- char *out_ptr = data[1 ];
121- npy_intp in_stride = strides[0 ];
122- npy_intp out_stride = strides[1 ];
123-
124- QuadPrecDTypeObject *descr_in = (QuadPrecDTypeObject *)context->descriptors [0 ];
125- QuadPrecDTypeObject *descr_out = (QuadPrecDTypeObject *)context->descriptors [1 ];
126-
127- // inter-backend casting
128- if (descr_in->backend != descr_out->backend ) {
129- if (descr_in->backend == BACKEND_SLEEF) {
130- while (N--) {
131- Sleef_quad in_val = *(Sleef_quad *)in_ptr;
132- *(long double *)out_ptr = Sleef_cast_to_doubleq1 (in_val);
133- in_ptr += in_stride;
134- out_ptr += out_stride;
135- }
136- }
137- else {
138- while (N--) {
139- long double in_val = *(long double *)in_ptr;
140- *(Sleef_quad *)out_ptr = Sleef_cast_from_doubleq1 (in_val);
141- in_ptr += in_stride;
142- out_ptr += out_stride;
143- }
144- }
145-
146- return 0 ;
147- }
148-
149- if (descr_in->backend == BACKEND_SLEEF) {
150- while (N--) {
151- *(Sleef_quad *)out_ptr = *(Sleef_quad *)in_ptr;
152- in_ptr += in_stride;
153- out_ptr += out_stride;
154- }
155- }
156- else {
157- while (N--) {
158- *(long double *)out_ptr = *(long double *)in_ptr;
159- in_ptr += in_stride;
160- out_ptr += out_stride;
161- }
162- }
163-
164- return 0 ;
165- }
166-
167125static NPY_CASTING
168126void_to_quad_resolve_descriptors (PyObject *NPY_UNUSED (self), PyArray_DTypeMeta *dtypes[2],
169127 PyArray_Descr *given_descrs[2], PyArray_Descr *loop_descrs[2],
@@ -1447,8 +1405,8 @@ init_casts_internal(void)
14471405 PyArray_DTypeMeta **quad2quad_dtypes = new PyArray_DTypeMeta *[2 ]{nullptr , nullptr };
14481406 PyType_Slot *quad2quad_slots = new PyType_Slot[4 ]{
14491407 {NPY_METH_resolve_descriptors, (void *)&quad_to_quad_resolve_descriptors},
1450- {NPY_METH_strided_loop, (void *)&quad_to_quad_strided_loop_aligned },
1451- {NPY_METH_unaligned_strided_loop, (void *)&quad_to_quad_strided_loop_unaligned },
1408+ {NPY_METH_strided_loop, (void *)&quad_to_quad_strided_loop< true > },
1409+ {NPY_METH_unaligned_strided_loop, (void *)&quad_to_quad_strided_loop< false > },
14521410 {0 , nullptr }};
14531411
14541412 PyArrayMethod_Spec *quad2quad_spec = new PyArrayMethod_Spec{
0 commit comments