Skip to content

Commit 78f36e0

Browse files
committed
Update stdsimd-verify for vendor types
This commit provides insurance that intrinsics are only introduced with known canonical types (`__m128i` and such) instead of also allowing `u8x16` for example.
1 parent 3849d63 commit 78f36e0

File tree

3 files changed

+89
-116
lines changed

3 files changed

+89
-116
lines changed

coresimd/src/x86/i686/sse4a.rs

+26-26
Original file line numberDiff line numberDiff line change
@@ -2,6 +2,7 @@
22
33
use core::mem;
44
use v128::*;
5+
use x86::*;
56

67
#[cfg(test)]
78
use stdsimd_test::assert_instr;
@@ -13,9 +14,9 @@ extern "C" {
1314
#[link_name = "llvm.x86.sse4a.insertq"]
1415
fn insertq(x: i64x2, y: i64x2) -> i64x2;
1516
#[link_name = "llvm.x86.sse4a.movnt.sd"]
16-
fn movntsd(x: *mut f64, y: f64x2);
17+
fn movntsd(x: *mut f64, y: __m128d);
1718
#[link_name = "llvm.x86.sse4a.movnt.ss"]
18-
fn movntss(x: *mut f32, y: f32x4);
19+
fn movntss(x: *mut f32, y: __m128);
1920
}
2021

2122
// FIXME(blocked on #248): _mm_extracti_si64(x, len, idx) // EXTRQ
@@ -35,8 +36,8 @@ extern "C" {
3536
#[inline(always)]
3637
#[target_feature(enable = "sse4a")]
3738
#[cfg_attr(test, assert_instr(extrq))]
38-
pub unsafe fn _mm_extract_si64(x: i64x2, y: i64x2) -> i64x2 {
39-
extrq(x, mem::transmute(y))
39+
pub unsafe fn _mm_extract_si64(x: __m128i, y: __m128i) -> __m128i {
40+
mem::transmute(extrq(x.as_i64x2(), y.as_i8x16()))
4041
}
4142

4243
/// Inserts the `[length:0]` bits of `y` into `x` at `index`.
@@ -51,59 +52,58 @@ pub unsafe fn _mm_extract_si64(x: i64x2, y: i64x2) -> i64x2 {
5152
#[inline(always)]
5253
#[target_feature(enable = "sse4a")]
5354
#[cfg_attr(test, assert_instr(insertq))]
54-
pub unsafe fn _mm_insert_si64(x: i64x2, y: i64x2) -> i64x2 {
55-
insertq(x, y)
55+
pub unsafe fn _mm_insert_si64(x: __m128i, y: __m128i) -> __m128i {
56+
mem::transmute(insertq(x.as_i64x2(), y.as_i64x2()))
5657
}
5758

5859
/// Non-temporal store of `a.0` into `p`.
5960
#[inline(always)]
6061
#[target_feature(enable = "sse4a")]
6162
#[cfg_attr(test, assert_instr(movntsd))]
62-
pub unsafe fn _mm_stream_sd(p: *mut f64, a: f64x2) {
63+
pub unsafe fn _mm_stream_sd(p: *mut f64, a: __m128d) {
6364
movntsd(p, a);
6465
}
6566

6667
/// Non-temporal store of `a.0` into `p`.
6768
#[inline(always)]
6869
#[target_feature(enable = "sse4a")]
6970
#[cfg_attr(test, assert_instr(movntss))]
70-
pub unsafe fn _mm_stream_ss(p: *mut f32, a: f32x4) {
71+
pub unsafe fn _mm_stream_ss(p: *mut f32, a: __m128) {
7172
movntss(p, a);
7273
}
7374

7475
#[cfg(test)]
7576
mod tests {
7677
use stdsimd_test::simd_test;
77-
use x86::i686::sse4a;
78-
use v128::*;
78+
use x86::*;
7979

8080
#[simd_test = "sse4a"]
81-
unsafe fn _mm_extract_si64() {
81+
unsafe fn test_mm_extract_si64() {
8282
let b = 0b0110_0000_0000_i64;
8383
// ^^^^ bit range extracted
84-
let x = i64x2::new(b, 0);
84+
let x = _mm_setr_epi64x(b, 0);
8585
let v = 0b001000___00___000100_i64;
8686
// ^idx: 2^3 = 8 ^length = 2^2 = 4
87-
let y = i64x2::new(v, 0);
88-
let e = i64x2::new(0b0110_i64, 0);
89-
let r = sse4a::_mm_extract_si64(x, y);
87+
let y = _mm_setr_epi64x(v, 0);
88+
let e = _mm_setr_epi64x(0b0110_i64, 0);
89+
let r = _mm_extract_si64(x, y);
9090
assert_eq!(r, e);
9191
}
9292

9393
#[simd_test = "sse4a"]
94-
unsafe fn _mm_insert_si64() {
94+
unsafe fn test_mm_insert_si64() {
9595
let i = 0b0110_i64;
9696
// ^^^^ bit range inserted
9797
let z = 0b1010_1010_1010i64;
9898
// ^^^^ bit range replaced
9999
let e = 0b0110_1010_1010i64;
100100
// ^^^^ replaced 1010 with 0110
101-
let x = i64x2::new(z, 0);
102-
let expected = i64x2::new(e, 0);
101+
let x = _mm_setr_epi64x(z, 0);
102+
let expected = _mm_setr_epi64x(e, 0);
103103
let v = 0b001000___00___000100_i64;
104104
// ^idx: 2^3 = 8 ^length = 2^2 = 4
105-
let y = i64x2::new(i, v);
106-
let r = sse4a::_mm_insert_si64(x, y);
105+
let y = _mm_setr_epi64x(i, v);
106+
let r = _mm_insert_si64(x, y);
107107
assert_eq!(r, expected);
108108
}
109109

@@ -113,17 +113,17 @@ mod tests {
113113
}
114114

115115
#[simd_test = "sse4a"]
116-
unsafe fn _mm_stream_sd() {
116+
unsafe fn test_mm_stream_sd() {
117117
let mut mem = MemoryF64 {
118118
data: [1.0_f64, 2.0],
119119
};
120120
{
121121
let vals = &mut mem.data;
122122
let d = vals.as_mut_ptr();
123123

124-
let x = f64x2::new(3.0, 4.0);
124+
let x = _mm_setr_pd(3.0, 4.0);
125125

126-
sse4a::_mm_stream_sd(d, x);
126+
_mm_stream_sd(d, x);
127127
}
128128
assert_eq!(mem.data[0], 3.0);
129129
assert_eq!(mem.data[1], 2.0);
@@ -135,17 +135,17 @@ mod tests {
135135
}
136136

137137
#[simd_test = "sse4a"]
138-
unsafe fn _mm_stream_ss() {
138+
unsafe fn test_mm_stream_ss() {
139139
let mut mem = MemoryF32 {
140140
data: [1.0_f32, 2.0, 3.0, 4.0],
141141
};
142142
{
143143
let vals = &mut mem.data;
144144
let d = vals.as_mut_ptr();
145145

146-
let x = f32x4::new(5.0, 6.0, 7.0, 8.0);
146+
let x = _mm_setr_ps(5.0, 6.0, 7.0, 8.0);
147147

148-
sse4a::_mm_stream_ss(d, x);
148+
_mm_stream_ss(d, x);
149149
}
150150
assert_eq!(mem.data[0], 5.0);
151151
assert_eq!(mem.data[1], 2.0);

stdsimd-verify/src/lib.rs

+33-35
Original file line numberDiff line numberDiff line change
@@ -25,6 +25,7 @@ pub fn x86_functions(input: TokenStream) -> TokenStream {
2525

2626
let mut files = Vec::new();
2727
walk(&root, &mut files);
28+
assert!(files.len() > 0);
2829

2930
let mut functions = Vec::new();
3031
for file in files {
@@ -35,6 +36,7 @@ pub fn x86_functions(input: TokenStream) -> TokenStream {
3536
}
3637
}
3738
}
39+
assert!(functions.len() > 0);
3840

3941
functions.retain(|f| {
4042
match f.vis {
@@ -48,10 +50,11 @@ pub fn x86_functions(input: TokenStream) -> TokenStream {
4850
.iter()
4951
.filter_map(|a| a.interpret_meta())
5052
.any(|a| match a {
51-
syn::Meta::NameValue(i) => i.ident == "target_feature",
53+
syn::Meta::List(i) => i.ident == "target_feature",
5254
_ => false,
5355
})
5456
});
57+
assert!(functions.len() > 0);
5558

5659
let input = proc_macro2::TokenStream::from(input);
5760

@@ -97,48 +100,24 @@ pub fn x86_functions(input: TokenStream) -> TokenStream {
97100
fn to_type(t: &syn::Type) -> Tokens {
98101
match *t {
99102
syn::Type::Path(ref p) => match extract_path_ident(&p.path).as_ref() {
100-
"__m128" => my_quote! { &F32x4 },
101-
"__m128d" => my_quote! { &F64x2 },
102-
"__m128i" => my_quote! { &I8x16 },
103-
"__m256i" => my_quote! { &I8x32 },
104-
"__m64" => my_quote! { &I8x8 },
103+
"__m128" => my_quote! { &M128 },
104+
"__m128d" => my_quote! { &M128D },
105+
"__m128i" => my_quote! { &M128I },
106+
"__m256" => my_quote! { &M256 },
107+
"__m256d" => my_quote! { &M256D },
108+
"__m256i" => my_quote! { &M256I },
109+
"__m64" => my_quote! { &M64 },
105110
"bool" => my_quote! { &BOOL },
106111
"f32" => my_quote! { &F32 },
107-
"f32x4" => my_quote! { &F32x4 },
108-
"f32x8" => my_quote! { &F32x8 },
109112
"f64" => my_quote! { &F64 },
110-
"f64x2" => my_quote! { &F64x2 },
111-
"f64x4" => my_quote! { &F64x4 },
112113
"i16" => my_quote! { &I16 },
113-
"i16x16" => my_quote! { &I16x16 },
114-
"i16x4" => my_quote! { &I16x4 },
115-
"i16x8" => my_quote! { &I16x8 },
116114
"i32" => my_quote! { &I32 },
117-
"i32x2" => my_quote! { &I32x2 },
118-
"i32x4" => my_quote! { &I32x4 },
119-
"i32x8" => my_quote! { &I32x8 },
120115
"i64" => my_quote! { &I64 },
121-
"i64x2" => my_quote! { &I64x2 },
122-
"i64x4" => my_quote! { &I64x4 },
123116
"i8" => my_quote! { &I8 },
124-
"i8x16" => my_quote! { &I8x16 },
125-
"i8x32" => my_quote! { &I8x32 },
126-
"i8x8" => my_quote! { &I8x8 },
127-
"u16x4" => my_quote! { &U16x4 },
128-
"u16x8" => my_quote! { &U16x8 },
117+
"u16" => my_quote! { &U16 },
129118
"u32" => my_quote! { &U32 },
130-
"u32x2" => my_quote! { &U32x2 },
131-
"u32x4" => my_quote! { &U32x4 },
132-
"u32x8" => my_quote! { &U32x8 },
133119
"u64" => my_quote! { &U64 },
134-
"u64x2" => my_quote! { &U64x2 },
135-
"u64x4" => my_quote! { &U64x4 },
136120
"u8" => my_quote! { &U8 },
137-
"u16" => my_quote! { &U16 },
138-
"u8x16" => my_quote! { &U8x16 },
139-
"u8x32" => my_quote! { &U8x32 },
140-
"u16x16" => my_quote! { &U16x16 },
141-
"u8x8" => my_quote! { &U8x8 },
142121
s => panic!("unspported type: {}", s),
143122
},
144123
syn::Type::Ptr(syn::TypePtr { ref elem, .. })
@@ -233,15 +212,34 @@ fn find_target_feature(
233212
.iter()
234213
.filter_map(|a| a.interpret_meta())
235214
.filter_map(|a| match a {
236-
syn::Meta::NameValue(i) => {
215+
syn::Meta::List(i) => {
237216
if i.ident == "target_feature" {
238-
Some(i.lit)
217+
Some(i.nested)
239218
} else {
240219
None
241220
}
242221
}
243222
_ => None,
244223
})
224+
.flat_map(|list| list)
225+
.filter_map(|nested| {
226+
match nested {
227+
syn::NestedMeta::Meta(m) => Some(m),
228+
syn::NestedMeta::Literal(_) => None,
229+
}
230+
})
231+
.filter_map(|m| {
232+
match m {
233+
syn::Meta::NameValue(i) => {
234+
if i.ident == "enable" {
235+
Some(i.lit)
236+
} else {
237+
None
238+
}
239+
}
240+
_ => None,
241+
}
242+
})
245243
.next()
246244
.expect(&format!("failed to find target_feature for {}", name))
247245
}

stdsimd-verify/tests/x86-intel.rs

+30-55
Original file line numberDiff line numberDiff line change
@@ -22,51 +22,37 @@ struct Function {
2222

2323
static BOOL: Type = Type::Bool;
2424
static F32: Type = Type::PrimFloat(32);
25-
static F32x4: Type = Type::Float(32, 4);
26-
static F32x8: Type = Type::Float(32, 8);
2725
static F64: Type = Type::PrimFloat(64);
28-
static F64x2: Type = Type::Float(64, 2);
29-
static F64x4: Type = Type::Float(64, 4);
3026
static I16: Type = Type::PrimSigned(16);
31-
static I16x16: Type = Type::Signed(16, 16);
32-
static I16x4: Type = Type::Signed(16, 4);
33-
static I16x8: Type = Type::Signed(16, 8);
3427
static I32: Type = Type::PrimSigned(32);
35-
static I32x2: Type = Type::Signed(32, 2);
36-
static I32x4: Type = Type::Signed(32, 4);
37-
static I32x8: Type = Type::Signed(32, 8);
3828
static I64: Type = Type::PrimSigned(64);
39-
static I64x2: Type = Type::Signed(64, 2);
40-
static I64x4: Type = Type::Signed(64, 4);
4129
static I8: Type = Type::PrimSigned(8);
42-
static I8x16: Type = Type::Signed(8, 16);
43-
static I8x32: Type = Type::Signed(8, 32);
44-
static I8x8: Type = Type::Signed(8, 8);
4530
static U16: Type = Type::PrimUnsigned(16);
46-
static U16x16: Type = Type::Unsigned(16, 16);
47-
// static U16x4: Type = Type::Unsigned(16, 4);
48-
static U16x8: Type = Type::Unsigned(16, 8);
4931
static U32: Type = Type::PrimUnsigned(32);
50-
static U32x2: Type = Type::Unsigned(32, 2);
51-
static U32x4: Type = Type::Unsigned(32, 4);
52-
static U32x8: Type = Type::Unsigned(32, 8);
5332
static U64: Type = Type::PrimUnsigned(64);
54-
static U64x2: Type = Type::Unsigned(64, 2);
55-
static U64x4: Type = Type::Unsigned(64, 4);
5633
static U8: Type = Type::PrimUnsigned(8);
57-
static U8x16: Type = Type::Unsigned(8, 16);
58-
static U8x32: Type = Type::Unsigned(8, 32);
59-
// static U8x8: Type = Type::Unsigned(8, 8);
34+
35+
static M64: Type = Type::M64;
36+
static M128: Type = Type::M128;
37+
static M128I: Type = Type::M128I;
38+
static M128D: Type = Type::M128D;
39+
static M256: Type = Type::M256;
40+
static M256I: Type = Type::M256I;
41+
static M256D: Type = Type::M256D;
6042

6143
#[derive(Debug)]
6244
enum Type {
63-
Float(u8, u8),
6445
PrimFloat(u8),
6546
PrimSigned(u8),
6647
PrimUnsigned(u8),
6748
Ptr(&'static Type),
68-
Signed(u8, u8),
69-
Unsigned(u8, u8),
49+
M64,
50+
M128,
51+
M128D,
52+
M128I,
53+
M256,
54+
M256D,
55+
M256I,
7056
Bool,
7157
}
7258

@@ -271,33 +257,22 @@ fn equate(t: &Type, intel: &str, intrinsic: &str) {
271257
(&Type::Ptr(&Type::PrimUnsigned(8)), "const void*") => {}
272258
(&Type::Ptr(&Type::PrimUnsigned(8)), "void*") => {}
273259

274-
(&Type::Signed(a, b), "__m128i")
275-
| (&Type::Unsigned(a, b), "__m128i")
276-
| (&Type::Ptr(&Type::Signed(a, b)), "__m128i*")
277-
| (&Type::Ptr(&Type::Unsigned(a, b)), "__m128i*") if a * b == 128 => {}
278-
279-
(&Type::Signed(a, b), "__m256i")
280-
| (&Type::Unsigned(a, b), "__m256i")
281-
| (&Type::Ptr(&Type::Signed(a, b)), "__m256i*")
282-
| (&Type::Ptr(&Type::Unsigned(a, b)), "__m256i*")
283-
if (a as u32) * (b as u32) == 256 => {}
284-
285-
(&Type::Signed(a, b), "__m64")
286-
| (&Type::Unsigned(a, b), "__m64")
287-
| (&Type::Ptr(&Type::Signed(a, b)), "__m64*")
288-
| (&Type::Ptr(&Type::Unsigned(a, b)), "__m64*") if a * b == 64 => {}
289-
290-
(&Type::Float(32, 4), "__m128") => {}
291-
(&Type::Ptr(&Type::Float(32, 4)), "__m128*") => {}
292-
293-
(&Type::Float(64, 2), "__m128d") => {}
294-
(&Type::Ptr(&Type::Float(64, 2)), "__m128d*") => {}
260+
(&Type::M64, "__m64")
261+
| (&Type::Ptr(&Type::M64), "__m64*") => {}
295262

296-
(&Type::Float(32, 8), "__m256") => {}
297-
(&Type::Ptr(&Type::Float(32, 8)), "__m256*") => {}
263+
(&Type::M128I, "__m128i")
264+
| (&Type::Ptr(&Type::M128I), "__m128i*")
265+
| (&Type::M128D, "__m128d")
266+
| (&Type::Ptr(&Type::M128D), "__m128d*")
267+
| (&Type::M128, "__m128")
268+
| (&Type::Ptr(&Type::M128), "__m128*") => {}
298269

299-
(&Type::Float(64, 4), "__m256d") => {}
300-
(&Type::Ptr(&Type::Float(64, 4)), "__m256d*") => {}
270+
(&Type::M256I, "__m256i")
271+
| (&Type::Ptr(&Type::M256I), "__m256i*")
272+
| (&Type::M256D, "__m256d")
273+
| (&Type::Ptr(&Type::M256D), "__m256d*")
274+
| (&Type::M256, "__m256")
275+
| (&Type::Ptr(&Type::M256), "__m256*") => {}
301276

302277
// These two intrinsics return a 16-bit element but in Intel's
303278
// intrinsics they're listed as returning an `int`.
@@ -311,7 +286,7 @@ fn equate(t: &Type, intel: &str, intrinsic: &str) {
311286
// This is a macro (?) in C which seems to mutate its arguments, but
312287
// that means that we're taking pointers to arguments in rust
313288
// as we're not exposing it as a macro.
314-
(&Type::Ptr(&Type::Float(32, 4)), "__m128")
289+
(&Type::Ptr(&Type::M128), "__m128")
315290
if intrinsic == "_MM_TRANSPOSE4_PS" => {}
316291

317292
// These intrinsics return an `int` in C but they're always either the

0 commit comments

Comments
 (0)