Skip to content

Commit 3fb2099

Browse files
GabrielMajerignzlbg
authored andcommitted
Add a AVX1 + FMA specialization
Also enables FMA for the tiled implementation
1 parent 44b15a7 commit 3fb2099

File tree

2 files changed

+24
-4
lines changed

2 files changed

+24
-4
lines changed

examples/aobench/src/tiled.rs

+13-3
Original file line numberDiff line numberDiff line change
@@ -86,7 +86,13 @@ cfg_if! {
8686
ao_impl(scene, nsubsamples, img);
8787
}
8888

89-
#[target_feature(enable = "avx2")]
89+
#[target_feature(enable = "avx,fma")]
90+
unsafe fn ao_avx_fma<S: Scene>(scene: &mut S, nsubsamples: usize,
91+
img: &mut ::Image) {
92+
ao_impl(scene, nsubsamples, img);
93+
}
94+
95+
#[target_feature(enable = "avx2,fma")]
9096
unsafe fn ao_avx2<S: Scene>(scene: &mut S, nsubsamples: usize,
9197
img: &mut ::Image) {
9298
ao_impl(scene, nsubsamples, img);
@@ -95,10 +101,14 @@ cfg_if! {
95101
pub fn ao<S: Scene>(scene: &mut S, nsubsamples: usize,
96102
img: &mut ::Image) {
97103
unsafe {
98-
if is_x86_feature_detected!("avx2") {
104+
if is_x86_feature_detected!("avx2") && is_x86_feature_detected!("fma") {
99105
ao_avx2(scene, nsubsamples, img);
100106
} else if is_x86_feature_detected!("avx") {
101-
ao_avx(scene, nsubsamples, img);
107+
if is_x86_feature_detected!("fma") {
108+
ao_avx_fma(scene, nsubsamples, img);
109+
} else {
110+
ao_avx(scene, nsubsamples, img);
111+
}
102112
} else if is_x86_feature_detected!("sse4.2") {
103113
ao_sse42(scene, nsubsamples, img);
104114
} else {

examples/aobench/src/vector.rs

+11-1
Original file line numberDiff line numberDiff line change
@@ -72,6 +72,12 @@ cfg_if! {
7272
ao_impl(scene, nsubsamples, img);
7373
}
7474

75+
#[target_feature(enable = "avx,fma")]
76+
unsafe fn ao_avx_fma<S: Scene>(scene: &mut S, nsubsamples: usize,
77+
img: &mut ::Image) {
78+
ao_impl(scene, nsubsamples, img);
79+
}
80+
7581
#[target_feature(enable = "avx2,fma")]
7682
unsafe fn ao_avx2<S: Scene>(scene: &mut S, nsubsamples: usize,
7783
img: &mut ::Image) {
@@ -84,7 +90,11 @@ cfg_if! {
8490
if is_x86_feature_detected!("avx2") && is_x86_feature_detected!("fma") {
8591
ao_avx2(scene, nsubsamples, img);
8692
} else if is_x86_feature_detected!("avx") {
87-
ao_avx(scene, nsubsamples, img);
93+
if is_x86_feature_detected!("fma") {
94+
ao_avx_fma(scene, nsubsamples, img);
95+
} else {
96+
ao_avx(scene, nsubsamples, img);
97+
}
8898
} else if is_x86_feature_detected!("sse4.2") {
8999
ao_sse42(scene, nsubsamples, img);
90100
} else {

0 commit comments

Comments
 (0)