@@ -104,9 +104,9 @@ quad_unary_op_resolve_descriptors(PyObject *self, PyArray_DTypeMeta *const dtype
104104
105105template <unary_op_quad_def sleef_op, unary_op_longdouble_def longdouble_op>
106106int
107- quad_generic_unary_op_strided_loop (PyArrayMethod_Context *context, char *const data[],
108- npy_intp const dimensions[], npy_intp const strides[],
109- NpyAuxData *auxdata)
107+ quad_generic_unary_op_strided_loop_unaligned (PyArrayMethod_Context *context, char *const data[],
108+ npy_intp const dimensions[], npy_intp const strides[],
109+ NpyAuxData *auxdata)
110110{
111111 npy_intp N = dimensions[0 ];
112112 char *in_ptr = data[0 ];
@@ -135,6 +135,34 @@ quad_generic_unary_op_strided_loop(PyArrayMethod_Context *context, char *const d
135135 return 0 ;
136136}
137137
138+ template <unary_op_quad_def sleef_op, unary_op_longdouble_def longdouble_op>
139+ int
140+ quad_generic_unary_op_strided_loop_aligned (PyArrayMethod_Context *context, char *const data[],
141+ npy_intp const dimensions[], npy_intp const strides[],
142+ NpyAuxData *auxdata)
143+ {
144+ npy_intp N = dimensions[0 ];
145+ char *in_ptr = data[0 ];
146+ char *out_ptr = data[1 ];
147+ npy_intp in_stride = strides[0 ];
148+ npy_intp out_stride = strides[1 ];
149+
150+ QuadPrecDTypeObject *descr = (QuadPrecDTypeObject *)context->descriptors [0 ];
151+ QuadBackendType backend = descr->backend ;
152+
153+ while (N--) {
154+ if (backend == BACKEND_SLEEF) {
155+ sleef_op ((Sleef_quad *)in_ptr, (Sleef_quad *)out_ptr);
156+ }
157+ else {
158+ longdouble_op ((long double *)in_ptr, (long double *)out_ptr);
159+ }
160+ in_ptr += in_stride;
161+ out_ptr += out_stride;
162+ }
163+ return 0 ;
164+ }
165+
138166template <unary_op_quad_def sleef_op, unary_op_longdouble_def longdouble_op>
139167int
140168create_quad_unary_ufunc (PyObject *numpy, const char *ufunc_name)
@@ -149,15 +177,17 @@ create_quad_unary_ufunc(PyObject *numpy, const char *ufunc_name)
149177 PyType_Slot slots[] = {
150178 {NPY_METH_resolve_descriptors, (void *)&quad_unary_op_resolve_descriptors},
151179 {NPY_METH_strided_loop,
152- (void *)&quad_generic_unary_op_strided_loop<sleef_op, longdouble_op>},
180+ (void *)&quad_generic_unary_op_strided_loop_aligned<sleef_op, longdouble_op>},
181+ {NPY_METH_unaligned_strided_loop,
182+ (void *)&quad_generic_unary_op_strided_loop_unaligned<sleef_op, longdouble_op>},
153183 {0 , NULL }};
154184
155185 PyArrayMethod_Spec Spec = {
156186 .name = " quad_unary_op" ,
157187 .nin = 1 ,
158188 .nout = 1 ,
159189 .casting = NPY_NO_CASTING,
160- .flags = (NPY_ARRAYMETHOD_FLAGS) 0 ,
190+ .flags = NPY_METH_SUPPORTS_UNALIGNED ,
161191 .dtypes = dtypes,
162192 .slots = slots,
163193 };
@@ -245,7 +275,6 @@ quad_binary_op_resolve_descriptors(PyObject *self, PyArray_DTypeMeta *const dtyp
245275 PyArray_Descr *const given_descrs[],
246276 PyArray_Descr *loop_descrs[], npy_intp *NPY_UNUSED (view_offset))
247277{
248-
249278 QuadPrecDTypeObject *descr_in1 = (QuadPrecDTypeObject *)given_descrs[0 ];
250279 QuadPrecDTypeObject *descr_in2 = (QuadPrecDTypeObject *)given_descrs[1 ];
251280 QuadBackendType target_backend;
@@ -255,7 +284,8 @@ quad_binary_op_resolve_descriptors(PyObject *self, PyArray_DTypeMeta *const dtyp
255284 if (descr_in1->backend != descr_in2->backend ) {
256285 target_backend = BACKEND_LONGDOUBLE;
257286 casting = NPY_SAFE_CASTING;
258- } else {
287+ }
288+ else {
259289 target_backend = descr_in1->backend ;
260290 }
261291
@@ -266,7 +296,8 @@ quad_binary_op_resolve_descriptors(PyObject *self, PyArray_DTypeMeta *const dtyp
266296 if (!loop_descrs[i]) {
267297 return (NPY_CASTING)-1 ;
268298 }
269- } else {
299+ }
300+ else {
270301 Py_INCREF (given_descrs[i]);
271302 loop_descrs[i] = given_descrs[i];
272303 }
@@ -278,14 +309,16 @@ quad_binary_op_resolve_descriptors(PyObject *self, PyArray_DTypeMeta *const dtyp
278309 if (!loop_descrs[2 ]) {
279310 return (NPY_CASTING)-1 ;
280311 }
281- } else {
312+ }
313+ else {
282314 QuadPrecDTypeObject *descr_out = (QuadPrecDTypeObject *)given_descrs[2 ];
283315 if (descr_out->backend != target_backend) {
284316 loop_descrs[2 ] = (PyArray_Descr *)new_quaddtype_instance (target_backend);
285317 if (!loop_descrs[2 ]) {
286318 return (NPY_CASTING)-1 ;
287319 }
288- } else {
320+ }
321+ else {
289322 Py_INCREF (given_descrs[2 ]);
290323 loop_descrs[2 ] = given_descrs[2 ];
291324 }
@@ -295,9 +328,9 @@ quad_binary_op_resolve_descriptors(PyObject *self, PyArray_DTypeMeta *const dtyp
295328
296329template <binary_op_quad_def sleef_op, binary_op_longdouble_def longdouble_op>
297330int
298- quad_generic_binop_strided_loop (PyArrayMethod_Context *context, char *const data[],
299- npy_intp const dimensions[], npy_intp const strides[],
300- NpyAuxData *auxdata)
331+ quad_generic_binop_strided_loop_unaligned (PyArrayMethod_Context *context, char *const data[],
332+ npy_intp const dimensions[], npy_intp const strides[],
333+ NpyAuxData *auxdata)
301334{
302335 npy_intp N = dimensions[0 ];
303336 char *in1_ptr = data[0 ], *in2_ptr = data[1 ];
@@ -329,6 +362,37 @@ quad_generic_binop_strided_loop(PyArrayMethod_Context *context, char *const data
329362 return 0 ;
330363}
331364
365+ template <binary_op_quad_def sleef_op, binary_op_longdouble_def longdouble_op>
366+ int
367+ quad_generic_binop_strided_loop_aligned (PyArrayMethod_Context *context, char *const data[],
368+ npy_intp const dimensions[], npy_intp const strides[],
369+ NpyAuxData *auxdata)
370+ {
371+ npy_intp N = dimensions[0 ];
372+ char *in1_ptr = data[0 ], *in2_ptr = data[1 ];
373+ char *out_ptr = data[2 ];
374+ npy_intp in1_stride = strides[0 ];
375+ npy_intp in2_stride = strides[1 ];
376+ npy_intp out_stride = strides[2 ];
377+
378+ QuadPrecDTypeObject *descr = (QuadPrecDTypeObject *)context->descriptors [0 ];
379+ QuadBackendType backend = descr->backend ;
380+
381+ while (N--) {
382+ if (backend == BACKEND_SLEEF) {
383+ sleef_op ((Sleef_quad *)out_ptr, (Sleef_quad *)in1_ptr, (Sleef_quad *)in2_ptr);
384+ }
385+ else {
386+ longdouble_op ((long double *)out_ptr, (long double *)in1_ptr, (long double *)in2_ptr);
387+ }
388+
389+ in1_ptr += in1_stride;
390+ in2_ptr += in2_stride;
391+ out_ptr += out_stride;
392+ }
393+ return 0 ;
394+ }
395+
332396static int
333397quad_ufunc_promoter (PyUFuncObject *ufunc, PyArray_DTypeMeta *op_dtypes[],
334398 PyArray_DTypeMeta *signature[], PyArray_DTypeMeta *new_op_dtypes[])
@@ -344,31 +408,26 @@ quad_ufunc_promoter(PyUFuncObject *ufunc, PyArray_DTypeMeta *op_dtypes[],
344408 for (int i = 0 ; i < 3 ; i++) {
345409 Py_INCREF (op_dtypes[1 ]);
346410 new_op_dtypes[i] = op_dtypes[1 ];
347-
348411 }
349412 return 0 ;
350413 }
351414
352415 // Check if any input or signature is QuadPrecision
353416 for (int i = 0 ; i < nin; i++) {
354-
355417 if (op_dtypes[i] == &QuadPrecDType) {
356418 has_quad = true ;
357-
358419 }
359420 }
360421
361422 if (has_quad) {
362423 common = &QuadPrecDType;
363-
364424 }
365425 else {
366426 for (int i = nin; i < nargs; i++) {
367427 if (signature[i] != NULL ) {
368428 if (common == NULL ) {
369429 Py_INCREF (signature[i]);
370430 common = signature[i];
371-
372431 }
373432 else if (common != signature[i]) {
374433 Py_CLEAR (common); // Not homogeneous, unset common
@@ -388,7 +447,6 @@ quad_ufunc_promoter(PyUFuncObject *ufunc, PyArray_DTypeMeta *op_dtypes[],
388447
389448 return -1 ;
390449 }
391-
392450 }
393451
394452 // Set all new_op_dtypes to the common dtype
@@ -424,15 +482,17 @@ create_quad_binary_ufunc(PyObject *numpy, const char *ufunc_name)
424482 PyType_Slot slots[] = {
425483 {NPY_METH_resolve_descriptors, (void *)&quad_binary_op_resolve_descriptors},
426484 {NPY_METH_strided_loop,
427- (void *)&quad_generic_binop_strided_loop<sleef_op, longdouble_op>},
485+ (void *)&quad_generic_binop_strided_loop_aligned<sleef_op, longdouble_op>},
486+ {NPY_METH_unaligned_strided_loop,
487+ (void *)&quad_generic_binop_strided_loop_unaligned<sleef_op, longdouble_op>},
428488 {0 , NULL }};
429489
430490 PyArrayMethod_Spec Spec = {
431491 .name = " quad_binop" ,
432492 .nin = 2 ,
433493 .nout = 1 ,
434494 .casting = NPY_NO_CASTING,
435- .flags = NPY_METH_IS_REORDERABLE,
495+ .flags = (NPY_ARRAYMETHOD_FLAGS)(NPY_METH_SUPPORTS_UNALIGNED | NPY_METH_IS_REORDERABLE) ,
436496 .dtypes = dtypes,
437497 .slots = slots,
438498 };
@@ -500,9 +560,9 @@ init_quad_binary_ops(PyObject *numpy)
500560
501561template <cmp_quad_def sleef_comp, cmp_londouble_def ld_comp>
502562int
503- quad_generic_comp_strided_loop (PyArrayMethod_Context *context, char *const data[],
504- npy_intp const dimensions[], npy_intp const strides[],
505- NpyAuxData *auxdata)
563+ quad_generic_comp_strided_loop_aligned (PyArrayMethod_Context *context, char *const data[],
564+ npy_intp const dimensions[], npy_intp const strides[],
565+ NpyAuxData *auxdata)
506566{
507567 npy_intp N = dimensions[0 ];
508568 char *in1_ptr = data[0 ], *in2_ptr = data[1 ];
@@ -513,7 +573,6 @@ quad_generic_comp_strided_loop(PyArrayMethod_Context *context, char *const data[
513573
514574 QuadPrecDTypeObject *descr = (QuadPrecDTypeObject *)context->descriptors [0 ];
515575 QuadBackendType backend = descr->backend ;
516- size_t elem_size = (backend == BACKEND_SLEEF) ? sizeof (Sleef_quad) : sizeof (long double );
517576
518577 while (N--) {
519578 if (backend == BACKEND_SLEEF) {
@@ -532,6 +591,42 @@ quad_generic_comp_strided_loop(PyArrayMethod_Context *context, char *const data[
532591 return 0 ;
533592}
534593
594+ template <cmp_quad_def sleef_comp, cmp_londouble_def ld_comp>
595+ int
596+ quad_generic_comp_strided_loop_unaligned (PyArrayMethod_Context *context, char *const data[],
597+ npy_intp const dimensions[], npy_intp const strides[],
598+ NpyAuxData *auxdata)
599+ {
600+ npy_intp N = dimensions[0 ];
601+ char *in1_ptr = data[0 ], *in2_ptr = data[1 ];
602+ char *out_ptr = data[2 ];
603+ npy_intp in1_stride = strides[0 ];
604+ npy_intp in2_stride = strides[1 ];
605+ npy_intp out_stride = strides[2 ];
606+
607+ QuadPrecDTypeObject *descr = (QuadPrecDTypeObject *)context->descriptors [0 ];
608+ QuadBackendType backend = descr->backend ;
609+ size_t elem_size = (backend == BACKEND_SLEEF) ? sizeof (Sleef_quad) : sizeof (long double );
610+
611+ quad_value in1, in2;
612+ while (N--) {
613+ memcpy (&in1, in1_ptr, elem_size);
614+ memcpy (&in2, in2_ptr, elem_size);
615+
616+ if (backend == BACKEND_SLEEF) {
617+ *((npy_bool *)out_ptr) = sleef_comp (&in1.sleef_value , &in2.sleef_value );
618+ }
619+ else {
620+ *((npy_bool *)out_ptr) = ld_comp (&in1.longdouble_value , &in2.longdouble_value );
621+ }
622+
623+ in1_ptr += in1_stride;
624+ in2_ptr += in2_stride;
625+ out_ptr += out_stride;
626+ }
627+ return 0 ;
628+ }
629+
535630NPY_NO_EXPORT int
536631comparison_ufunc_promoter (PyUFuncObject *ufunc, PyArray_DTypeMeta *op_dtypes[],
537632 PyArray_DTypeMeta *signature[], PyArray_DTypeMeta *new_op_dtypes[])
@@ -559,16 +654,18 @@ create_quad_comparison_ufunc(PyObject *numpy, const char *ufunc_name)
559654
560655 PyArray_DTypeMeta *dtypes[3 ] = {&QuadPrecDType, &QuadPrecDType, &PyArray_BoolDType};
561656
562- PyType_Slot slots[] = {
563- {NPY_METH_strided_loop, (void *)&quad_generic_comp_strided_loop<sleef_comp, ld_comp>},
564- {0 , NULL }};
657+ PyType_Slot slots[] = {{NPY_METH_strided_loop,
658+ (void *)&quad_generic_comp_strided_loop_aligned<sleef_comp, ld_comp>},
659+ {NPY_METH_unaligned_strided_loop,
660+ (void *)&quad_generic_comp_strided_loop_unaligned<sleef_comp, ld_comp>},
661+ {0 , NULL }};
565662
566663 PyArrayMethod_Spec Spec = {
567664 .name = " quad_comp" ,
568665 .nin = 2 ,
569666 .nout = 1 ,
570667 .casting = NPY_NO_CASTING,
571- .flags = (NPY_ARRAYMETHOD_FLAGS) 0 ,
668+ .flags = NPY_METH_SUPPORTS_UNALIGNED ,
572669 .dtypes = dtypes,
573670 .slots = slots,
574671 };
0 commit comments