Make Fused Multiply-Add intrinsics require vfp4 instead of armv8.

hkratz · hkratz · commit 394404335300 · 2021-09-19T18:03:04.000Z
diff --git a/crates/core_arch/src/arm_shared/neon/generated.rs b/crates/core_arch/src/arm_shared/neon/generated.rs
@@ -8721,7 +8721,7 @@ pub unsafe fn vmull_laneq_u32<const LANE: i32>(a: uint32x2_t, b: uint32x4_t) ->
 /// Floating-point fused Multiply-Add to accumulator(vector)
 #[inline]
 #[target_feature(enable = "neon")]
-#[cfg_attr(target_arch = "arm", target_feature(enable = "fp-armv8,v8"))]
+#[cfg_attr(target_arch = "arm", target_feature(enable = "vfp4"))]
 #[cfg_attr(all(test, target_arch = "arm"), assert_instr(vfma))]
 #[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(fmla))]
 pub unsafe fn vfma_f32(a: float32x2_t, b: float32x2_t, c: float32x2_t) -> float32x2_t {
@@ -8737,7 +8737,7 @@ vfma_f32_(b, c, a)
 /// Floating-point fused Multiply-Add to accumulator(vector)
 #[inline]
 #[target_feature(enable = "neon")]
-#[cfg_attr(target_arch = "arm", target_feature(enable = "fp-armv8,v8"))]
+#[cfg_attr(target_arch = "arm", target_feature(enable = "vfp4"))]
 #[cfg_attr(all(test, target_arch = "arm"), assert_instr(vfma))]
 #[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(fmla))]
 pub unsafe fn vfmaq_f32(a: float32x4_t, b: float32x4_t, c: float32x4_t) -> float32x4_t {
@@ -8753,7 +8753,7 @@ vfmaq_f32_(b, c, a)
 /// Floating-point fused Multiply-Add to accumulator(vector)
 #[inline]
 #[target_feature(enable = "neon")]
-#[cfg_attr(target_arch = "arm", target_feature(enable = "fp-armv8,v8"))]
+#[cfg_attr(target_arch = "arm", target_feature(enable = "vfp4"))]
 #[cfg_attr(all(test, target_arch = "arm"), assert_instr(vfma))]
 #[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(fmla))]
 pub unsafe fn vfma_n_f32(a: float32x2_t, b: float32x2_t, c: f32) -> float32x2_t {
@@ -8763,7 +8763,7 @@ pub unsafe fn vfma_n_f32(a: float32x2_t, b: float32x2_t, c: f32) -> float32x2_t
 /// Floating-point fused Multiply-Add to accumulator(vector)
 #[inline]
 #[target_feature(enable = "neon")]
-#[cfg_attr(target_arch = "arm", target_feature(enable = "fp-armv8,v8"))]
+#[cfg_attr(target_arch = "arm", target_feature(enable = "vfp4"))]
 #[cfg_attr(all(test, target_arch = "arm"), assert_instr(vfma))]
 #[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(fmla))]
 pub unsafe fn vfmaq_n_f32(a: float32x4_t, b: float32x4_t, c: f32) -> float32x4_t {
@@ -8773,7 +8773,7 @@ pub unsafe fn vfmaq_n_f32(a: float32x4_t, b: float32x4_t, c: f32) -> float32x4_t
 /// Floating-point fused multiply-subtract from accumulator
 #[inline]
 #[target_feature(enable = "neon")]
-#[cfg_attr(target_arch = "arm", target_feature(enable = "fp-armv8,v8"))]
+#[cfg_attr(target_arch = "arm", target_feature(enable = "vfp4"))]
 #[cfg_attr(all(test, target_arch = "arm"), assert_instr(vfms))]
 #[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(fmls))]
 pub unsafe fn vfms_f32(a: float32x2_t, b: float32x2_t, c: float32x2_t) -> float32x2_t {
@@ -8784,7 +8784,7 @@ pub unsafe fn vfms_f32(a: float32x2_t, b: float32x2_t, c: float32x2_t) -> float3
 /// Floating-point fused multiply-subtract from accumulator
 #[inline]
 #[target_feature(enable = "neon")]
-#[cfg_attr(target_arch = "arm", target_feature(enable = "fp-armv8,v8"))]
+#[cfg_attr(target_arch = "arm", target_feature(enable = "vfp4"))]
 #[cfg_attr(all(test, target_arch = "arm"), assert_instr(vfms))]
 #[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(fmls))]
 pub unsafe fn vfmsq_f32(a: float32x4_t, b: float32x4_t, c: float32x4_t) -> float32x4_t {
@@ -8795,7 +8795,7 @@ pub unsafe fn vfmsq_f32(a: float32x4_t, b: float32x4_t, c: float32x4_t) -> float
 /// Floating-point fused Multiply-subtract to accumulator(vector)
 #[inline]
 #[target_feature(enable = "neon")]
-#[cfg_attr(target_arch = "arm", target_feature(enable = "fp-armv8,v8"))]
+#[cfg_attr(target_arch = "arm", target_feature(enable = "vfp4"))]
 #[cfg_attr(all(test, target_arch = "arm"), assert_instr(vfms))]
 #[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(fmls))]
 pub unsafe fn vfms_n_f32(a: float32x2_t, b: float32x2_t, c: f32) -> float32x2_t {
@@ -8805,7 +8805,7 @@ pub unsafe fn vfms_n_f32(a: float32x2_t, b: float32x2_t, c: f32) -> float32x2_t
 /// Floating-point fused Multiply-subtract to accumulator(vector)
 #[inline]
 #[target_feature(enable = "neon")]
-#[cfg_attr(target_arch = "arm", target_feature(enable = "fp-armv8,v8"))]
+#[cfg_attr(target_arch = "arm", target_feature(enable = "vfp4"))]
 #[cfg_attr(all(test, target_arch = "arm"), assert_instr(vfms))]
 #[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(fmls))]
 pub unsafe fn vfmsq_n_f32(a: float32x4_t, b: float32x4_t, c: f32) -> float32x4_t {
diff --git a/crates/stdarch-gen/neon.spec b/crates/stdarch-gen/neon.spec
@@ -2733,7 +2733,7 @@ generate float64x1_t
 aarch64 = fmla
 generate float64x2_t
 
-target = fp-armv8
+target = vfp4
 arm = vfma
 link-arm = llvm.fma._EXT_
 generate float*_t
@@ -2752,7 +2752,7 @@ generate float64x1_t:float64x1_t:f64:float64x1_t
 aarch64 = fmla
 generate float64x2_t:float64x2_t:f64:float64x2_t
 
-target = fp-armv8
+target = vfp4
 arm = vfma
 generate float32x2_t:float32x2_t:f32:float32x2_t, float32x4_t:float32x4_t:f32:float32x4_t
 
@@ -2811,7 +2811,7 @@ generate float64x1_t
 aarch64 = fmls
 generate float64x2_t
 
-target = fp-armv8
+target = vfp4
 arm = vfms
 generate float*_t
 
@@ -2829,7 +2829,7 @@ generate float64x1_t:float64x1_t:f64:float64x1_t
 aarch64 = fmls
 generate float64x2_t:float64x2_t:f64:float64x2_t
 
-target = fp-armv8
+target = vfp4
 arm = vfms
 generate float32x2_t:float32x2_t:f32:float32x2_t, float32x4_t:float32x4_t:f32:float32x4_t
 
diff --git a/crates/stdarch-gen/src/main.rs b/crates/stdarch-gen/src/main.rs
@@ -438,6 +438,7 @@ enum Suffix {
 enum TargetFeature {
     Default,
     ArmV7,
+    Vfp4,
     FPArmV8,
     AES,
 }
@@ -980,6 +981,7 @@ fn gen_aarch64(
     let current_target = match target {
         Default => "neon",
         ArmV7 => "v7",
+        Vfp4 => "vfp4",
         FPArmV8 => "fp-armv8,v8",
         AES => "neon,aes",
     };
@@ -1630,12 +1632,14 @@ fn gen_arm(
     let current_target_aarch64 = match target {
         Default => "neon",
         ArmV7 => "neon",
+        Vfp4 => "neon",
         FPArmV8 => "neon",
         AES => "neon,aes",
     };
     let current_target_arm = match target {
         Default => "v7",
         ArmV7 => "v7",
+        Vfp4 => "vfp4",
         FPArmV8 => "fp-armv8,v8",
         AES => "aes,v8",
     };
@@ -2854,6 +2858,7 @@ mod test {
             target = match Some(String::from(&line[9..])) {
                 Some(input) => match input.as_str() {
                     "v7" => ArmV7,
+                    "vfp4" => Vfp4,
                     "fp-armv8" => FPArmV8,
                     "aes" => AES,
                     _ => Default,