Skip to content
This repository was archived by the owner on Apr 28, 2025. It is now read-only.

Commit 51718a1

Browse files
committed
Add assembly version of simple operations on aarch64
For aarch64 and arm64ec with Neon, add assembly versions of the following: * `ceil` * `ceilf` * `fabs` * `fabsf` * `floor` * `floorf` * `fma` * `fmaf` * `round` * `roundf` * `sqrt` * `sqrtf` * `trunc` * `truncf` If the `fp16` target feature is available, which implies `neon`, also include the following: * `ceilf16` * `fabsf16` * `floorf16` * `rintf16` * `sqrtf16` * `truncf16` Additionally, replace `core::arch` versions of the following with handwritten assembly (which avoids issues with `aarch64be`): * `rint` * `rintf` Instructions for `fmax` and `fmin` are also available but seem to provide different results based on whether NaN inputs are signaling or quiet. Our current implementation does not do this, so omit these for now.
1 parent bc6a615 commit 51718a1

25 files changed

+391
-36
lines changed

etc/function-definitions.json

+20
Original file line numberDiff line numberDiff line change
@@ -107,6 +107,7 @@
107107
"ceil": {
108108
"sources": [
109109
"src/libm_helper.rs",
110+
"src/math/arch/aarch64.rs",
110111
"src/math/arch/i586.rs",
111112
"src/math/arch/wasm32.rs",
112113
"src/math/ceil.rs",
@@ -116,6 +117,7 @@
116117
},
117118
"ceilf": {
118119
"sources": [
120+
"src/math/arch/aarch64.rs",
119121
"src/math/arch/wasm32.rs",
120122
"src/math/ceilf.rs",
121123
"src/math/generic/ceil.rs"
@@ -131,6 +133,7 @@
131133
},
132134
"ceilf16": {
133135
"sources": [
136+
"src/math/arch/aarch64.rs",
134137
"src/math/ceilf16.rs",
135138
"src/math/generic/ceil.rs"
136139
],
@@ -274,6 +277,7 @@
274277
"fabs": {
275278
"sources": [
276279
"src/libm_helper.rs",
280+
"src/math/arch/aarch64.rs",
277281
"src/math/arch/wasm32.rs",
278282
"src/math/fabs.rs",
279283
"src/math/generic/fabs.rs"
@@ -282,6 +286,7 @@
282286
},
283287
"fabsf": {
284288
"sources": [
289+
"src/math/arch/aarch64.rs",
285290
"src/math/arch/wasm32.rs",
286291
"src/math/fabsf.rs",
287292
"src/math/generic/fabs.rs"
@@ -297,6 +302,7 @@
297302
},
298303
"fabsf16": {
299304
"sources": [
305+
"src/math/arch/aarch64.rs",
300306
"src/math/fabsf16.rs",
301307
"src/math/generic/fabs.rs"
302308
],
@@ -334,6 +340,7 @@
334340
"floor": {
335341
"sources": [
336342
"src/libm_helper.rs",
343+
"src/math/arch/aarch64.rs",
337344
"src/math/arch/i586.rs",
338345
"src/math/arch/wasm32.rs",
339346
"src/math/floor.rs",
@@ -343,6 +350,7 @@
343350
},
344351
"floorf": {
345352
"sources": [
353+
"src/math/arch/aarch64.rs",
346354
"src/math/arch/wasm32.rs",
347355
"src/math/floorf.rs",
348356
"src/math/generic/floor.rs"
@@ -358,6 +366,7 @@
358366
},
359367
"floorf16": {
360368
"sources": [
369+
"src/math/arch/aarch64.rs",
361370
"src/math/floorf16.rs",
362371
"src/math/generic/floor.rs"
363372
],
@@ -366,12 +375,14 @@
366375
"fma": {
367376
"sources": [
368377
"src/libm_helper.rs",
378+
"src/math/arch/aarch64.rs",
369379
"src/math/fma.rs"
370380
],
371381
"type": "f64"
372382
},
373383
"fmaf": {
374384
"sources": [
385+
"src/math/arch/aarch64.rs",
375386
"src/math/fmaf.rs"
376387
],
377388
"type": "f32"
@@ -677,6 +688,7 @@
677688
},
678689
"rintf16": {
679690
"sources": [
691+
"src/math/arch/aarch64.rs",
680692
"src/math/generic/rint.rs",
681693
"src/math/rintf16.rs"
682694
],
@@ -685,12 +697,14 @@
685697
"round": {
686698
"sources": [
687699
"src/libm_helper.rs",
700+
"src/math/arch/aarch64.rs",
688701
"src/math/round.rs"
689702
],
690703
"type": "f64"
691704
},
692705
"roundf": {
693706
"sources": [
707+
"src/math/arch/aarch64.rs",
694708
"src/math/roundf.rs"
695709
],
696710
"type": "f32"
@@ -750,6 +764,7 @@
750764
"sqrt": {
751765
"sources": [
752766
"src/libm_helper.rs",
767+
"src/math/arch/aarch64.rs",
753768
"src/math/arch/i686.rs",
754769
"src/math/arch/wasm32.rs",
755770
"src/math/generic/sqrt.rs",
@@ -759,6 +774,7 @@
759774
},
760775
"sqrtf": {
761776
"sources": [
777+
"src/math/arch/aarch64.rs",
762778
"src/math/arch/i686.rs",
763779
"src/math/arch/wasm32.rs",
764780
"src/math/generic/sqrt.rs",
@@ -775,6 +791,7 @@
775791
},
776792
"sqrtf16": {
777793
"sources": [
794+
"src/math/arch/aarch64.rs",
778795
"src/math/generic/sqrt.rs",
779796
"src/math/sqrtf16.rs"
780797
],
@@ -822,6 +839,7 @@
822839
"trunc": {
823840
"sources": [
824841
"src/libm_helper.rs",
842+
"src/math/arch/aarch64.rs",
825843
"src/math/arch/wasm32.rs",
826844
"src/math/generic/trunc.rs",
827845
"src/math/trunc.rs"
@@ -830,6 +848,7 @@
830848
},
831849
"truncf": {
832850
"sources": [
851+
"src/math/arch/aarch64.rs",
833852
"src/math/arch/wasm32.rs",
834853
"src/math/generic/trunc.rs",
835854
"src/math/truncf.rs"
@@ -845,6 +864,7 @@
845864
},
846865
"truncf16": {
847866
"sources": [
867+
"src/math/arch/aarch64.rs",
848868
"src/math/generic/trunc.rs",
849869
"src/math/truncf16.rs"
850870
],

0 commit comments

Comments
 (0)