Skip to content

Commit 5d37060

Browse files
authored
Update stdsimd-verify for vendor types (#289)
This commit provides insurance that intrinsics are only introduced with known canonical types (`__m128i` and such) instead of also allowing `u8x16` for example.
1 parent 3849d63 commit 5d37060

File tree

5 files changed

+118
-137
lines changed

5 files changed

+118
-137
lines changed

coresimd/src/x86/i586/avx2.rs

+18-21
Original file line numberDiff line numberDiff line change
@@ -3162,8 +3162,6 @@ extern "C" {
31623162
mod tests {
31633163
use stdsimd_test::simd_test;
31643164

3165-
use v256::*;
3166-
use v128::*;
31673165
use x86::*;
31683166
use std;
31693167

@@ -3423,17 +3421,16 @@ mod tests {
34233421
unsafe fn test_mm256_and_si256() {
34243422
let a = _mm256_set1_epi8(5);
34253423
let b = _mm256_set1_epi8(3);
3426-
let got = _mm256_and_si256(__m256i::from(a), __m256i::from(b));
3427-
assert_eq!(got, __m256i::from(_mm256_set1_epi8(1)));
3424+
let got = _mm256_and_si256(a, b);
3425+
assert_eq!(got, _mm256_set1_epi8(1));
34283426
}
34293427

34303428
#[simd_test = "avx2"]
34313429
unsafe fn test_mm256_andnot_si256() {
34323430
let a = _mm256_set1_epi8(5);
34333431
let b = _mm256_set1_epi8(3);
3434-
let got =
3435-
_mm256_andnot_si256(__m256i::from(a), __m256i::from(b));
3436-
assert_eq!(got, __m256i::from(_mm256_set1_epi8(2)));
3432+
let got = _mm256_andnot_si256(a, b);
3433+
assert_eq!(got, _mm256_set1_epi8(2));
34373434
}
34383435

34393436
#[simd_test = "avx2"]
@@ -3774,9 +3771,9 @@ mod tests {
37743771

37753772
#[simd_test = "avx2"]
37763773
unsafe fn test_mm256_extracti128_si256() {
3777-
let a = __m256i::from(_mm256_setr_epi64x(1, 2, 3, 4));
3774+
let a = _mm256_setr_epi64x(1, 2, 3, 4);
37783775
let r = _mm256_extracti128_si256(a, 0b01);
3779-
let e = __m128i::from(_mm_setr_epi64x(3, 4));
3776+
let e = _mm_setr_epi64x(3, 4);
37803777
assert_eq!(r, e);
37813778
}
37823779

@@ -3850,11 +3847,11 @@ mod tests {
38503847

38513848
#[simd_test = "avx2"]
38523849
unsafe fn test_mm256_inserti128_si256() {
3853-
let a = __m256i::from(_mm256_setr_epi64x(1, 2, 3, 4));
3854-
let b = __m128i::from(_mm_setr_epi64x(7, 8));
3850+
let a = _mm256_setr_epi64x(1, 2, 3, 4);
3851+
let b = _mm_setr_epi64x(7, 8);
38553852
let r = _mm256_inserti128_si256(a, b, 0b01);
38563853
let e = _mm256_setr_epi64x(1, 2, 7, 8);
3857-
assert_eq!(r, __m256i::from(e));
3854+
assert_eq!(r, e);
38583855
}
38593856

38603857
#[simd_test = "avx2"]
@@ -4124,8 +4121,8 @@ mod tests {
41244121

41254122
#[simd_test = "avx2"]
41264123
unsafe fn test_mm256_or_si256() {
4127-
let a = __m256i::from(_mm256_set1_epi8(-1));
4128-
let b = __m256i::from(_mm256_set1_epi8(0));
4124+
let a = _mm256_set1_epi8(-1);
4125+
let b = _mm256_set1_epi8(0);
41294126
let r = _mm256_or_si256(a, b);
41304127
assert_eq!(r, a);
41314128
}
@@ -4301,8 +4298,8 @@ mod tests {
43014298
#[simd_test = "avx2"]
43024299
unsafe fn test_mm256_slli_si256() {
43034300
let a = _mm256_set1_epi64x(0xFFFFFFFF);
4304-
let r = _mm256_slli_si256(__m256i::from(a), 3);
4305-
assert_eq!(r, __m256i::from(_mm256_set1_epi64x(0xFFFFFFFF000000)));
4301+
let r = _mm256_slli_si256(a, 3);
4302+
assert_eq!(r, _mm256_set1_epi64x(0xFFFFFFFF000000));
43064303
}
43074304

43084305
#[simd_test = "avx2"]
@@ -4400,15 +4397,15 @@ mod tests {
44004397
17, 18, 19, 20, 21, 22, 23, 24,
44014398
25, 26, 27, 28, 29, 30, 31, 32,
44024399
);
4403-
let r = _mm256_srli_si256(__m256i::from(a), 3);
4400+
let r = _mm256_srli_si256(a, 3);
44044401
#[cfg_attr(rustfmt, rustfmt_skip)]
44054402
let e = _mm256_setr_epi8(
44064403
4, 5, 6, 7, 8, 9, 10, 11,
44074404
12, 13, 14, 15, 16, 0, 0, 0,
44084405
20, 21, 22, 23, 24, 25, 26, 27,
44094406
28, 29, 30, 31, 32, 0, 0, 0,
44104407
);
4411-
assert_eq!(r, __m256i::from(e));
4408+
assert_eq!(r, e);
44124409
}
44134410

44144411
#[simd_test = "avx2"]
@@ -4561,10 +4558,10 @@ mod tests {
45614558

45624559
#[simd_test = "avx2"]
45634560
unsafe fn test_mm256_xor_si256() {
4564-
let a = __m256i::from(_mm256_set1_epi8(5));
4565-
let b = __m256i::from(_mm256_set1_epi8(3));
4561+
let a = _mm256_set1_epi8(5);
4562+
let b = _mm256_set1_epi8(3);
45664563
let r = _mm256_xor_si256(a, b);
4567-
assert_eq!(r, __m256i::from(_mm256_set1_epi8(6)));
4564+
assert_eq!(r, _mm256_set1_epi8(6));
45684565
}
45694566

45704567
#[simd_test = "avx2"]

coresimd/src/x86/i686/sse4a.rs

+26-26
Original file line numberDiff line numberDiff line change
@@ -2,6 +2,7 @@
22
33
use core::mem;
44
use v128::*;
5+
use x86::*;
56

67
#[cfg(test)]
78
use stdsimd_test::assert_instr;
@@ -13,9 +14,9 @@ extern "C" {
1314
#[link_name = "llvm.x86.sse4a.insertq"]
1415
fn insertq(x: i64x2, y: i64x2) -> i64x2;
1516
#[link_name = "llvm.x86.sse4a.movnt.sd"]
16-
fn movntsd(x: *mut f64, y: f64x2);
17+
fn movntsd(x: *mut f64, y: __m128d);
1718
#[link_name = "llvm.x86.sse4a.movnt.ss"]
18-
fn movntss(x: *mut f32, y: f32x4);
19+
fn movntss(x: *mut f32, y: __m128);
1920
}
2021

2122
// FIXME(blocked on #248): _mm_extracti_si64(x, len, idx) // EXTRQ
@@ -35,8 +36,8 @@ extern "C" {
3536
#[inline(always)]
3637
#[target_feature(enable = "sse4a")]
3738
#[cfg_attr(test, assert_instr(extrq))]
38-
pub unsafe fn _mm_extract_si64(x: i64x2, y: i64x2) -> i64x2 {
39-
extrq(x, mem::transmute(y))
39+
pub unsafe fn _mm_extract_si64(x: __m128i, y: __m128i) -> __m128i {
40+
mem::transmute(extrq(x.as_i64x2(), y.as_i8x16()))
4041
}
4142

4243
/// Inserts the `[length:0]` bits of `y` into `x` at `index`.
@@ -51,59 +52,58 @@ pub unsafe fn _mm_extract_si64(x: i64x2, y: i64x2) -> i64x2 {
5152
#[inline(always)]
5253
#[target_feature(enable = "sse4a")]
5354
#[cfg_attr(test, assert_instr(insertq))]
54-
pub unsafe fn _mm_insert_si64(x: i64x2, y: i64x2) -> i64x2 {
55-
insertq(x, y)
55+
pub unsafe fn _mm_insert_si64(x: __m128i, y: __m128i) -> __m128i {
56+
mem::transmute(insertq(x.as_i64x2(), y.as_i64x2()))
5657
}
5758

5859
/// Non-temporal store of `a.0` into `p`.
5960
#[inline(always)]
6061
#[target_feature(enable = "sse4a")]
6162
#[cfg_attr(test, assert_instr(movntsd))]
62-
pub unsafe fn _mm_stream_sd(p: *mut f64, a: f64x2) {
63+
pub unsafe fn _mm_stream_sd(p: *mut f64, a: __m128d) {
6364
movntsd(p, a);
6465
}
6566

6667
/// Non-temporal store of `a.0` into `p`.
6768
#[inline(always)]
6869
#[target_feature(enable = "sse4a")]
6970
#[cfg_attr(test, assert_instr(movntss))]
70-
pub unsafe fn _mm_stream_ss(p: *mut f32, a: f32x4) {
71+
pub unsafe fn _mm_stream_ss(p: *mut f32, a: __m128) {
7172
movntss(p, a);
7273
}
7374

7475
#[cfg(test)]
7576
mod tests {
7677
use stdsimd_test::simd_test;
77-
use x86::i686::sse4a;
78-
use v128::*;
78+
use x86::*;
7979

8080
#[simd_test = "sse4a"]
81-
unsafe fn _mm_extract_si64() {
81+
unsafe fn test_mm_extract_si64() {
8282
let b = 0b0110_0000_0000_i64;
8383
// ^^^^ bit range extracted
84-
let x = i64x2::new(b, 0);
84+
let x = _mm_setr_epi64x(b, 0);
8585
let v = 0b001000___00___000100_i64;
8686
// ^idx: 2^3 = 8 ^length = 2^2 = 4
87-
let y = i64x2::new(v, 0);
88-
let e = i64x2::new(0b0110_i64, 0);
89-
let r = sse4a::_mm_extract_si64(x, y);
87+
let y = _mm_setr_epi64x(v, 0);
88+
let e = _mm_setr_epi64x(0b0110_i64, 0);
89+
let r = _mm_extract_si64(x, y);
9090
assert_eq!(r, e);
9191
}
9292

9393
#[simd_test = "sse4a"]
94-
unsafe fn _mm_insert_si64() {
94+
unsafe fn test_mm_insert_si64() {
9595
let i = 0b0110_i64;
9696
// ^^^^ bit range inserted
9797
let z = 0b1010_1010_1010i64;
9898
// ^^^^ bit range replaced
9999
let e = 0b0110_1010_1010i64;
100100
// ^^^^ replaced 1010 with 0110
101-
let x = i64x2::new(z, 0);
102-
let expected = i64x2::new(e, 0);
101+
let x = _mm_setr_epi64x(z, 0);
102+
let expected = _mm_setr_epi64x(e, 0);
103103
let v = 0b001000___00___000100_i64;
104104
// ^idx: 2^3 = 8 ^length = 2^2 = 4
105-
let y = i64x2::new(i, v);
106-
let r = sse4a::_mm_insert_si64(x, y);
105+
let y = _mm_setr_epi64x(i, v);
106+
let r = _mm_insert_si64(x, y);
107107
assert_eq!(r, expected);
108108
}
109109

@@ -113,17 +113,17 @@ mod tests {
113113
}
114114

115115
#[simd_test = "sse4a"]
116-
unsafe fn _mm_stream_sd() {
116+
unsafe fn test_mm_stream_sd() {
117117
let mut mem = MemoryF64 {
118118
data: [1.0_f64, 2.0],
119119
};
120120
{
121121
let vals = &mut mem.data;
122122
let d = vals.as_mut_ptr();
123123

124-
let x = f64x2::new(3.0, 4.0);
124+
let x = _mm_setr_pd(3.0, 4.0);
125125

126-
sse4a::_mm_stream_sd(d, x);
126+
_mm_stream_sd(d, x);
127127
}
128128
assert_eq!(mem.data[0], 3.0);
129129
assert_eq!(mem.data[1], 2.0);
@@ -135,17 +135,17 @@ mod tests {
135135
}
136136

137137
#[simd_test = "sse4a"]
138-
unsafe fn _mm_stream_ss() {
138+
unsafe fn test_mm_stream_ss() {
139139
let mut mem = MemoryF32 {
140140
data: [1.0_f32, 2.0, 3.0, 4.0],
141141
};
142142
{
143143
let vals = &mut mem.data;
144144
let d = vals.as_mut_ptr();
145145

146-
let x = f32x4::new(5.0, 6.0, 7.0, 8.0);
146+
let x = _mm_setr_ps(5.0, 6.0, 7.0, 8.0);
147147

148-
sse4a::_mm_stream_ss(d, x);
148+
_mm_stream_ss(d, x);
149149
}
150150
assert_eq!(mem.data[0], 5.0);
151151
assert_eq!(mem.data[1], 2.0);

coresimd/src/x86/test.rs

+11
Original file line numberDiff line numberDiff line change
@@ -63,3 +63,14 @@ pub unsafe fn get_m256(a: __m256, idx: usize) -> f32 {
6363
union A { a: __m256, b: [f32; 8] };
6464
A { a }.b[idx]
6565
}
66+
67+
// These intrinsics doesn't exist on x86 b/c it requires a 64-bit registe,r which
68+
// doesn't exist on x86!
69+
#[cfg(target_arch = "x86")]
70+
#[target_feature(enable = "avx")]
71+
pub unsafe fn _mm_insert_epi64(a: __m128i, val: i64, idx: i32) -> __m128i {
72+
union A { a: __m128i, b: [i64; 2] };
73+
let mut a = A { a };
74+
a.b[idx as usize] = val;
75+
a.a
76+
}

stdsimd-verify/src/lib.rs

+33-35
Original file line numberDiff line numberDiff line change
@@ -25,6 +25,7 @@ pub fn x86_functions(input: TokenStream) -> TokenStream {
2525

2626
let mut files = Vec::new();
2727
walk(&root, &mut files);
28+
assert!(files.len() > 0);
2829

2930
let mut functions = Vec::new();
3031
for file in files {
@@ -35,6 +36,7 @@ pub fn x86_functions(input: TokenStream) -> TokenStream {
3536
}
3637
}
3738
}
39+
assert!(functions.len() > 0);
3840

3941
functions.retain(|f| {
4042
match f.vis {
@@ -48,10 +50,11 @@ pub fn x86_functions(input: TokenStream) -> TokenStream {
4850
.iter()
4951
.filter_map(|a| a.interpret_meta())
5052
.any(|a| match a {
51-
syn::Meta::NameValue(i) => i.ident == "target_feature",
53+
syn::Meta::List(i) => i.ident == "target_feature",
5254
_ => false,
5355
})
5456
});
57+
assert!(functions.len() > 0);
5558

5659
let input = proc_macro2::TokenStream::from(input);
5760

@@ -97,48 +100,24 @@ pub fn x86_functions(input: TokenStream) -> TokenStream {
97100
fn to_type(t: &syn::Type) -> Tokens {
98101
match *t {
99102
syn::Type::Path(ref p) => match extract_path_ident(&p.path).as_ref() {
100-
"__m128" => my_quote! { &F32x4 },
101-
"__m128d" => my_quote! { &F64x2 },
102-
"__m128i" => my_quote! { &I8x16 },
103-
"__m256i" => my_quote! { &I8x32 },
104-
"__m64" => my_quote! { &I8x8 },
103+
"__m128" => my_quote! { &M128 },
104+
"__m128d" => my_quote! { &M128D },
105+
"__m128i" => my_quote! { &M128I },
106+
"__m256" => my_quote! { &M256 },
107+
"__m256d" => my_quote! { &M256D },
108+
"__m256i" => my_quote! { &M256I },
109+
"__m64" => my_quote! { &M64 },
105110
"bool" => my_quote! { &BOOL },
106111
"f32" => my_quote! { &F32 },
107-
"f32x4" => my_quote! { &F32x4 },
108-
"f32x8" => my_quote! { &F32x8 },
109112
"f64" => my_quote! { &F64 },
110-
"f64x2" => my_quote! { &F64x2 },
111-
"f64x4" => my_quote! { &F64x4 },
112113
"i16" => my_quote! { &I16 },
113-
"i16x16" => my_quote! { &I16x16 },
114-
"i16x4" => my_quote! { &I16x4 },
115-
"i16x8" => my_quote! { &I16x8 },
116114
"i32" => my_quote! { &I32 },
117-
"i32x2" => my_quote! { &I32x2 },
118-
"i32x4" => my_quote! { &I32x4 },
119-
"i32x8" => my_quote! { &I32x8 },
120115
"i64" => my_quote! { &I64 },
121-
"i64x2" => my_quote! { &I64x2 },
122-
"i64x4" => my_quote! { &I64x4 },
123116
"i8" => my_quote! { &I8 },
124-
"i8x16" => my_quote! { &I8x16 },
125-
"i8x32" => my_quote! { &I8x32 },
126-
"i8x8" => my_quote! { &I8x8 },
127-
"u16x4" => my_quote! { &U16x4 },
128-
"u16x8" => my_quote! { &U16x8 },
117+
"u16" => my_quote! { &U16 },
129118
"u32" => my_quote! { &U32 },
130-
"u32x2" => my_quote! { &U32x2 },
131-
"u32x4" => my_quote! { &U32x4 },
132-
"u32x8" => my_quote! { &U32x8 },
133119
"u64" => my_quote! { &U64 },
134-
"u64x2" => my_quote! { &U64x2 },
135-
"u64x4" => my_quote! { &U64x4 },
136120
"u8" => my_quote! { &U8 },
137-
"u16" => my_quote! { &U16 },
138-
"u8x16" => my_quote! { &U8x16 },
139-
"u8x32" => my_quote! { &U8x32 },
140-
"u16x16" => my_quote! { &U16x16 },
141-
"u8x8" => my_quote! { &U8x8 },
142121
s => panic!("unspported type: {}", s),
143122
},
144123
syn::Type::Ptr(syn::TypePtr { ref elem, .. })
@@ -233,15 +212,34 @@ fn find_target_feature(
233212
.iter()
234213
.filter_map(|a| a.interpret_meta())
235214
.filter_map(|a| match a {
236-
syn::Meta::NameValue(i) => {
215+
syn::Meta::List(i) => {
237216
if i.ident == "target_feature" {
238-
Some(i.lit)
217+
Some(i.nested)
239218
} else {
240219
None
241220
}
242221
}
243222
_ => None,
244223
})
224+
.flat_map(|list| list)
225+
.filter_map(|nested| {
226+
match nested {
227+
syn::NestedMeta::Meta(m) => Some(m),
228+
syn::NestedMeta::Literal(_) => None,
229+
}
230+
})
231+
.filter_map(|m| {
232+
match m {
233+
syn::Meta::NameValue(i) => {
234+
if i.ident == "enable" {
235+
Some(i.lit)
236+
} else {
237+
None
238+
}
239+
}
240+
_ => None,
241+
}
242+
})
245243
.next()
246244
.expect(&format!("failed to find target_feature for {}", name))
247245
}

0 commit comments

Comments
 (0)