Update stdsimd-verify for vendor types

alexcrichton · alexcrichton · commit 78f36e000287 · 2018-01-19T08:33:17.000-08:00
This commit provides insurance that intrinsics are only introduced with known
canonical types (`__m128i` and such) instead of also allowing `u8x16` for
example.
diff --git a/coresimd/src/x86/i686/sse4a.rs b/coresimd/src/x86/i686/sse4a.rs
@@ -2,6 +2,7 @@
 
 use core::mem;
 use v128::*;
+use x86::*;
 
 #[cfg(test)]
 use stdsimd_test::assert_instr;
@@ -13,9 +14,9 @@ extern "C" {
     #[link_name = "llvm.x86.sse4a.insertq"]
     fn insertq(x: i64x2, y: i64x2) -> i64x2;
     #[link_name = "llvm.x86.sse4a.movnt.sd"]
-    fn movntsd(x: *mut f64, y: f64x2);
+    fn movntsd(x: *mut f64, y: __m128d);
     #[link_name = "llvm.x86.sse4a.movnt.ss"]
-    fn movntss(x: *mut f32, y: f32x4);
+    fn movntss(x: *mut f32, y: __m128);
 }
 
 // FIXME(blocked on #248): _mm_extracti_si64(x, len, idx) // EXTRQ
@@ -35,8 +36,8 @@ extern "C" {
 #[inline(always)]
 #[target_feature(enable = "sse4a")]
 #[cfg_attr(test, assert_instr(extrq))]
-pub unsafe fn _mm_extract_si64(x: i64x2, y: i64x2) -> i64x2 {
-    extrq(x, mem::transmute(y))
+pub unsafe fn _mm_extract_si64(x: __m128i, y: __m128i) -> __m128i {
+    mem::transmute(extrq(x.as_i64x2(), y.as_i8x16()))
 }
 
 /// Inserts the `[length:0]` bits of `y` into `x` at `index`.
@@ -51,59 +52,58 @@ pub unsafe fn _mm_extract_si64(x: i64x2, y: i64x2) -> i64x2 {
 #[inline(always)]
 #[target_feature(enable = "sse4a")]
 #[cfg_attr(test, assert_instr(insertq))]
-pub unsafe fn _mm_insert_si64(x: i64x2, y: i64x2) -> i64x2 {
-    insertq(x, y)
+pub unsafe fn _mm_insert_si64(x: __m128i, y: __m128i) -> __m128i {
+    mem::transmute(insertq(x.as_i64x2(), y.as_i64x2()))
 }
 
 /// Non-temporal store of `a.0` into `p`.
 #[inline(always)]
 #[target_feature(enable = "sse4a")]
 #[cfg_attr(test, assert_instr(movntsd))]
-pub unsafe fn _mm_stream_sd(p: *mut f64, a: f64x2) {
+pub unsafe fn _mm_stream_sd(p: *mut f64, a: __m128d) {
     movntsd(p, a);
 }
 
 /// Non-temporal store of `a.0` into `p`.
 #[inline(always)]
 #[target_feature(enable = "sse4a")]
 #[cfg_attr(test, assert_instr(movntss))]
-pub unsafe fn _mm_stream_ss(p: *mut f32, a: f32x4) {
+pub unsafe fn _mm_stream_ss(p: *mut f32, a: __m128) {
     movntss(p, a);
 }
 
 #[cfg(test)]
 mod tests {
     use stdsimd_test::simd_test;
-    use x86::i686::sse4a;
-    use v128::*;
+    use x86::*;
 
     #[simd_test = "sse4a"]
-    unsafe fn _mm_extract_si64() {
+    unsafe fn test_mm_extract_si64() {
         let b = 0b0110_0000_0000_i64;
         //        ^^^^ bit range extracted
-        let x = i64x2::new(b, 0);
+        let x = _mm_setr_epi64x(b, 0);
         let v = 0b001000___00___000100_i64;
         //        ^idx: 2^3 = 8 ^length = 2^2 = 4
-        let y = i64x2::new(v, 0);
-        let e = i64x2::new(0b0110_i64, 0);
-        let r = sse4a::_mm_extract_si64(x, y);
+        let y = _mm_setr_epi64x(v, 0);
+        let e = _mm_setr_epi64x(0b0110_i64, 0);
+        let r = _mm_extract_si64(x, y);
         assert_eq!(r, e);
     }
 
     #[simd_test = "sse4a"]
-    unsafe fn _mm_insert_si64() {
+    unsafe fn test_mm_insert_si64() {
         let i = 0b0110_i64;
         //        ^^^^ bit range inserted
         let z = 0b1010_1010_1010i64;
         //        ^^^^ bit range replaced
         let e = 0b0110_1010_1010i64;
         //        ^^^^ replaced 1010 with 0110
-        let x = i64x2::new(z, 0);
-        let expected = i64x2::new(e, 0);
+        let x = _mm_setr_epi64x(z, 0);
+        let expected = _mm_setr_epi64x(e, 0);
         let v = 0b001000___00___000100_i64;
         //        ^idx: 2^3 = 8 ^length = 2^2 = 4
-        let y = i64x2::new(i, v);
-        let r = sse4a::_mm_insert_si64(x, y);
+        let y = _mm_setr_epi64x(i, v);
+        let r = _mm_insert_si64(x, y);
         assert_eq!(r, expected);
     }
 
@@ -113,17 +113,17 @@ mod tests {
     }
 
     #[simd_test = "sse4a"]
-    unsafe fn _mm_stream_sd() {
+    unsafe fn test_mm_stream_sd() {
         let mut mem = MemoryF64 {
             data: [1.0_f64, 2.0],
         };
         {
             let vals = &mut mem.data;
             let d = vals.as_mut_ptr();
 
-            let x = f64x2::new(3.0, 4.0);
+            let x = _mm_setr_pd(3.0, 4.0);
 
-            sse4a::_mm_stream_sd(d, x);
+            _mm_stream_sd(d, x);
         }
         assert_eq!(mem.data[0], 3.0);
         assert_eq!(mem.data[1], 2.0);
@@ -135,17 +135,17 @@ mod tests {
     }
 
     #[simd_test = "sse4a"]
-    unsafe fn _mm_stream_ss() {
+    unsafe fn test_mm_stream_ss() {
         let mut mem = MemoryF32 {
             data: [1.0_f32, 2.0, 3.0, 4.0],
         };
         {
             let vals = &mut mem.data;
             let d = vals.as_mut_ptr();
 
-            let x = f32x4::new(5.0, 6.0, 7.0, 8.0);
+            let x = _mm_setr_ps(5.0, 6.0, 7.0, 8.0);
 
-            sse4a::_mm_stream_ss(d, x);
+            _mm_stream_ss(d, x);
         }
         assert_eq!(mem.data[0], 5.0);
         assert_eq!(mem.data[1], 2.0);
diff --git a/stdsimd-verify/src/lib.rs b/stdsimd-verify/src/lib.rs
@@ -25,6 +25,7 @@ pub fn x86_functions(input: TokenStream) -> TokenStream {
 
     let mut files = Vec::new();
     walk(&root, &mut files);
+    assert!(files.len() > 0);
 
     let mut functions = Vec::new();
     for file in files {
@@ -35,6 +36,7 @@ pub fn x86_functions(input: TokenStream) -> TokenStream {
             }
         }
     }
+    assert!(functions.len() > 0);
 
     functions.retain(|f| {
         match f.vis {
@@ -48,10 +50,11 @@ pub fn x86_functions(input: TokenStream) -> TokenStream {
             .iter()
             .filter_map(|a| a.interpret_meta())
             .any(|a| match a {
-                syn::Meta::NameValue(i) => i.ident == "target_feature",
+                syn::Meta::List(i) => i.ident == "target_feature",
                 _ => false,
             })
     });
+    assert!(functions.len() > 0);
 
     let input = proc_macro2::TokenStream::from(input);
 
@@ -97,48 +100,24 @@ pub fn x86_functions(input: TokenStream) -> TokenStream {
 fn to_type(t: &syn::Type) -> Tokens {
     match *t {
         syn::Type::Path(ref p) => match extract_path_ident(&p.path).as_ref() {
-            "__m128" => my_quote! { &F32x4 },
-            "__m128d" => my_quote! { &F64x2 },
-            "__m128i" => my_quote! { &I8x16 },
-            "__m256i" => my_quote! { &I8x32 },
-            "__m64" => my_quote! { &I8x8 },
+            "__m128" => my_quote! { &M128 },
+            "__m128d" => my_quote! { &M128D },
+            "__m128i" => my_quote! { &M128I },
+            "__m256" => my_quote! { &M256 },
+            "__m256d" => my_quote! { &M256D },
+            "__m256i" => my_quote! { &M256I },
+            "__m64" => my_quote! { &M64 },
             "bool" => my_quote! { &BOOL },
             "f32" => my_quote! { &F32 },
-            "f32x4" => my_quote! { &F32x4 },
-            "f32x8" => my_quote! { &F32x8 },
             "f64" => my_quote! { &F64 },
-            "f64x2" => my_quote! { &F64x2 },
-            "f64x4" => my_quote! { &F64x4 },
             "i16" => my_quote! { &I16 },
-            "i16x16" => my_quote! { &I16x16 },
-            "i16x4" => my_quote! { &I16x4 },
-            "i16x8" => my_quote! { &I16x8 },
             "i32" => my_quote! { &I32 },
-            "i32x2" => my_quote! { &I32x2 },
-            "i32x4" => my_quote! { &I32x4 },
-            "i32x8" => my_quote! { &I32x8 },
             "i64" => my_quote! { &I64 },
-            "i64x2" => my_quote! { &I64x2 },
-            "i64x4" => my_quote! { &I64x4 },
             "i8" => my_quote! { &I8 },
-            "i8x16" => my_quote! { &I8x16 },
-            "i8x32" => my_quote! { &I8x32 },
-            "i8x8" => my_quote! { &I8x8 },
-            "u16x4" => my_quote! { &U16x4 },
-            "u16x8" => my_quote! { &U16x8 },
+            "u16" => my_quote! { &U16 },
             "u32" => my_quote! { &U32 },
-            "u32x2" => my_quote! { &U32x2 },
-            "u32x4" => my_quote! { &U32x4 },
-            "u32x8" => my_quote! { &U32x8 },
             "u64" => my_quote! { &U64 },
-            "u64x2" => my_quote! { &U64x2 },
-            "u64x4" => my_quote! { &U64x4 },
             "u8" => my_quote! { &U8 },
-            "u16" => my_quote! { &U16 },
-            "u8x16" => my_quote! { &U8x16 },
-            "u8x32" => my_quote! { &U8x32 },
-            "u16x16" => my_quote! { &U16x16 },
-            "u8x8" => my_quote! { &U8x8 },
             s => panic!("unspported type: {}", s),
         },
         syn::Type::Ptr(syn::TypePtr { ref elem, .. })
@@ -233,15 +212,34 @@ fn find_target_feature(
         .iter()
         .filter_map(|a| a.interpret_meta())
         .filter_map(|a| match a {
-            syn::Meta::NameValue(i) => {
+            syn::Meta::List(i) => {
                 if i.ident == "target_feature" {
-                    Some(i.lit)
+                    Some(i.nested)
                 } else {
                     None
                 }
             }
             _ => None,
         })
+        .flat_map(|list| list)
+        .filter_map(|nested| {
+            match nested {
+                syn::NestedMeta::Meta(m) => Some(m),
+                syn::NestedMeta::Literal(_) => None,
+            }
+        })
+        .filter_map(|m| {
+            match m {
+                syn::Meta::NameValue(i) => {
+                    if i.ident == "enable" {
+                        Some(i.lit)
+                    } else {
+                        None
+                    }
+                }
+                _ => None,
+            }
+        })
         .next()
         .expect(&format!("failed to find target_feature for {}", name))
 }
diff --git a/stdsimd-verify/tests/x86-intel.rs b/stdsimd-verify/tests/x86-intel.rs
@@ -22,51 +22,37 @@ struct Function {
 
 static BOOL: Type = Type::Bool;
 static F32: Type = Type::PrimFloat(32);
-static F32x4: Type = Type::Float(32, 4);
-static F32x8: Type = Type::Float(32, 8);
 static F64: Type = Type::PrimFloat(64);
-static F64x2: Type = Type::Float(64, 2);
-static F64x4: Type = Type::Float(64, 4);
 static I16: Type = Type::PrimSigned(16);
-static I16x16: Type = Type::Signed(16, 16);
-static I16x4: Type = Type::Signed(16, 4);
-static I16x8: Type = Type::Signed(16, 8);
 static I32: Type = Type::PrimSigned(32);
-static I32x2: Type = Type::Signed(32, 2);
-static I32x4: Type = Type::Signed(32, 4);
-static I32x8: Type = Type::Signed(32, 8);
 static I64: Type = Type::PrimSigned(64);
-static I64x2: Type = Type::Signed(64, 2);
-static I64x4: Type = Type::Signed(64, 4);
 static I8: Type = Type::PrimSigned(8);
-static I8x16: Type = Type::Signed(8, 16);
-static I8x32: Type = Type::Signed(8, 32);
-static I8x8: Type = Type::Signed(8, 8);
 static U16: Type = Type::PrimUnsigned(16);
-static U16x16: Type = Type::Unsigned(16, 16);
-// static U16x4: Type = Type::Unsigned(16, 4);
-static U16x8: Type = Type::Unsigned(16, 8);
 static U32: Type = Type::PrimUnsigned(32);
-static U32x2: Type = Type::Unsigned(32, 2);
-static U32x4: Type = Type::Unsigned(32, 4);
-static U32x8: Type = Type::Unsigned(32, 8);
 static U64: Type = Type::PrimUnsigned(64);
-static U64x2: Type = Type::Unsigned(64, 2);
-static U64x4: Type = Type::Unsigned(64, 4);
 static U8: Type = Type::PrimUnsigned(8);
-static U8x16: Type = Type::Unsigned(8, 16);
-static U8x32: Type = Type::Unsigned(8, 32);
-// static U8x8: Type = Type::Unsigned(8, 8);
+
+static M64: Type = Type::M64;
+static M128: Type = Type::M128;
+static M128I: Type = Type::M128I;
+static M128D: Type = Type::M128D;
+static M256: Type = Type::M256;
+static M256I: Type = Type::M256I;
+static M256D: Type = Type::M256D;
 
 #[derive(Debug)]
 enum Type {
-    Float(u8, u8),
     PrimFloat(u8),
     PrimSigned(u8),
     PrimUnsigned(u8),
     Ptr(&'static Type),
-    Signed(u8, u8),
-    Unsigned(u8, u8),
+    M64,
+    M128,
+    M128D,
+    M128I,
+    M256,
+    M256D,
+    M256I,
     Bool,
 }
 
@@ -271,33 +257,22 @@ fn equate(t: &Type, intel: &str, intrinsic: &str) {
         (&Type::Ptr(&Type::PrimUnsigned(8)), "const void*") => {}
         (&Type::Ptr(&Type::PrimUnsigned(8)), "void*") => {}
 
-        (&Type::Signed(a, b), "__m128i")
-        | (&Type::Unsigned(a, b), "__m128i")
-        | (&Type::Ptr(&Type::Signed(a, b)), "__m128i*")
-        | (&Type::Ptr(&Type::Unsigned(a, b)), "__m128i*") if a * b == 128 => {}
-
-        (&Type::Signed(a, b), "__m256i")
-        | (&Type::Unsigned(a, b), "__m256i")
-        | (&Type::Ptr(&Type::Signed(a, b)), "__m256i*")
-        | (&Type::Ptr(&Type::Unsigned(a, b)), "__m256i*")
-            if (a as u32) * (b as u32) == 256 => {}
-
-        (&Type::Signed(a, b), "__m64")
-        | (&Type::Unsigned(a, b), "__m64")
-        | (&Type::Ptr(&Type::Signed(a, b)), "__m64*")
-        | (&Type::Ptr(&Type::Unsigned(a, b)), "__m64*") if a * b == 64 => {}
-
-        (&Type::Float(32, 4), "__m128") => {}
-        (&Type::Ptr(&Type::Float(32, 4)), "__m128*") => {}
-
-        (&Type::Float(64, 2), "__m128d") => {}
-        (&Type::Ptr(&Type::Float(64, 2)), "__m128d*") => {}
+        (&Type::M64, "__m64")
+        | (&Type::Ptr(&Type::M64), "__m64*") => {}
 
-        (&Type::Float(32, 8), "__m256") => {}
-        (&Type::Ptr(&Type::Float(32, 8)), "__m256*") => {}
+        (&Type::M128I, "__m128i")
+        | (&Type::Ptr(&Type::M128I), "__m128i*")
+        | (&Type::M128D, "__m128d")
+        | (&Type::Ptr(&Type::M128D), "__m128d*")
+        | (&Type::M128, "__m128")
+        | (&Type::Ptr(&Type::M128), "__m128*") => {}
 
-        (&Type::Float(64, 4), "__m256d") => {}
-        (&Type::Ptr(&Type::Float(64, 4)), "__m256d*") => {}
+        (&Type::M256I, "__m256i")
+        | (&Type::Ptr(&Type::M256I), "__m256i*")
+        | (&Type::M256D, "__m256d")
+        | (&Type::Ptr(&Type::M256D), "__m256d*")
+        | (&Type::M256, "__m256")
+        | (&Type::Ptr(&Type::M256), "__m256*") => {}
 
         // These two intrinsics return a 16-bit element but in Intel's
         // intrinsics they're listed as returning an `int`.
@@ -311,7 +286,7 @@ fn equate(t: &Type, intel: &str, intrinsic: &str) {
         // This is a macro (?) in C which seems to mutate its arguments, but
         // that means that we're taking pointers to arguments in rust
         // as we're not exposing it as a macro.
-        (&Type::Ptr(&Type::Float(32, 4)), "__m128")
+        (&Type::Ptr(&Type::M128), "__m128")
             if intrinsic == "_MM_TRANSPOSE4_PS" => {}
 
         // These intrinsics return an `int` in C but they're always either the