From caad14826c757ed22499cc820eaf45b4bbe3493e Mon Sep 17 00:00:00 2001 From: Trevor Elliott Date: Wed, 7 Sep 2022 13:49:35 -0700 Subject: [PATCH] Rework the ISA flag checking extractors for x64 (#4878) Using fallible extractors that produce no values for flag checks means that it's not possible to pattern match cases where those flags are false. This change reworks the existing flag-checking extractors to be infallible, returning the flag's boolean value from the context instead. --- cranelift/codegen/src/isa/x64/inst.isle | 40 +++++------ cranelift/codegen/src/isa/x64/lower.isle | 68 +++++++++--------- cranelift/codegen/src/isa/x64/lower/isle.rs | 80 ++++++--------------- 3 files changed, 74 insertions(+), 114 deletions(-) diff --git a/cranelift/codegen/src/isa/x64/inst.isle b/cranelift/codegen/src/isa/x64/inst.isle index c3a4c65bc07f..af95202647d7 100644 --- a/cranelift/codegen/src/isa/x64/inst.isle +++ b/cranelift/codegen/src/isa/x64/inst.isle @@ -1386,35 +1386,35 @@ ;;;; Helpers for Querying Enabled ISA Extensions ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; -(decl avx512vl_enabled () Type) -(extern extractor avx512vl_enabled avx512vl_enabled) +(decl avx512vl_enabled (bool) Type) +(extern extractor infallible avx512vl_enabled avx512vl_enabled) -(decl avx512dq_enabled () Type) -(extern extractor avx512dq_enabled avx512dq_enabled) +(decl avx512dq_enabled (bool) Type) +(extern extractor infallible avx512dq_enabled avx512dq_enabled) -(decl avx512f_enabled () Type) -(extern extractor avx512f_enabled avx512f_enabled) +(decl avx512f_enabled (bool) Type) +(extern extractor infallible avx512f_enabled avx512f_enabled) -(decl avx512bitalg_enabled () Type) -(extern extractor avx512bitalg_enabled avx512bitalg_enabled) +(decl avx512bitalg_enabled (bool) Type) +(extern extractor infallible avx512bitalg_enabled avx512bitalg_enabled) -(decl avx512vbmi_enabled () Type) -(extern extractor avx512vbmi_enabled avx512vbmi_enabled) +(decl avx512vbmi_enabled (bool) Type) +(extern extractor infallible avx512vbmi_enabled avx512vbmi_enabled) -(decl use_lzcnt () Type) -(extern extractor use_lzcnt use_lzcnt) +(decl use_lzcnt (bool) Type) +(extern extractor infallible use_lzcnt use_lzcnt) -(decl use_bmi1 () Type) -(extern extractor use_bmi1 use_bmi1) +(decl use_bmi1 (bool) Type) +(extern extractor infallible use_bmi1 use_bmi1) -(decl use_popcnt () Type) -(extern extractor use_popcnt use_popcnt) +(decl use_popcnt (bool) Type) +(extern extractor infallible use_popcnt use_popcnt) -(decl use_fma () Type) -(extern extractor use_fma use_fma) +(decl use_fma (bool) Type) +(extern extractor infallible use_fma use_fma) -(decl use_sse41 () Type) -(extern extractor use_sse41 use_sse41) +(decl use_sse41 (bool) Type) +(extern extractor infallible use_sse41 use_sse41) ;;;; Helpers for Merging and Sinking Immediates/Loads ;;;;;;;;;;;;;;;;;;;;;;;;; diff --git a/cranelift/codegen/src/isa/x64/lower.isle b/cranelift/codegen/src/isa/x64/lower.isle index b86c2fe79214..72b82d271c6d 100644 --- a/cranelift/codegen/src/isa/x64/lower.isle +++ b/cranelift/codegen/src/isa/x64/lower.isle @@ -939,8 +939,8 @@ ;; With AVX-512 we can implement `i64x2` multiplication with a single ;; instruction. -(rule (lower (has_type (and (avx512vl_enabled) - (avx512dq_enabled) +(rule (lower (has_type (and (avx512vl_enabled $true) + (avx512dq_enabled $true) (multi_lane 64 2)) (imul x y))) (x64_vpmullq x y)) @@ -1167,8 +1167,8 @@ (x64_pabsd x)) ;; When AVX512 is available, we can use a single `vpabsq` instruction. -(rule (lower (has_type (and (avx512vl_enabled) - (avx512f_enabled) +(rule (lower (has_type (and (avx512vl_enabled $true) + (avx512f_enabled $true) $I64X2) (iabs x))) (x64_vpabsq x)) @@ -1733,7 +1733,7 @@ (rule 1 (lower (has_type (and (ty_32_or_64 ty) - (use_lzcnt)) + (use_lzcnt $true)) (clz src))) (x64_lzcnt ty src)) @@ -1775,7 +1775,7 @@ (rule 1 (lower (has_type (and (ty_32_or_64 ty) - (use_bmi1)) + (use_bmi1 $true)) (ctz src))) (x64_tzcnt ty src)) @@ -1811,21 +1811,21 @@ (rule 1 (lower (has_type (and (ty_32_or_64 ty) - (use_popcnt)) + (use_popcnt $true)) (popcnt src))) (x64_popcnt ty src)) (rule 1 (lower (has_type (and (ty_8_or_16 ty) - (use_popcnt)) + (use_popcnt $true)) (popcnt src))) (x64_popcnt $I32 (extend_to_gpr src $I32 (ExtendKind.Zero)))) (rule 1 (lower (has_type (and $I128 - (use_popcnt)) + (use_popcnt $true)) (popcnt src))) (let ((lo_count Gpr (x64_popcnt $I64 (value_regs_get_gpr src 0))) (hi_count Gpr (x64_popcnt $I64 (value_regs_get_gpr src 1)))) @@ -1916,8 +1916,8 @@ (rule 1 (lower (has_type (and $I8X16 - (avx512vl_enabled) - (avx512bitalg_enabled)) + (avx512vl_enabled $true) + (avx512bitalg_enabled $true)) (popcnt src))) (x64_vpopcntb src)) @@ -2480,13 +2480,13 @@ (libcall_3 (LibCall.FmaF32) x y z)) (rule (lower (has_type $F64 (fma x y z))) (libcall_3 (LibCall.FmaF64) x y z)) -(rule 1 (lower (has_type (and (use_fma) $F32) (fma x y z))) +(rule 1 (lower (has_type (and (use_fma $true) $F32) (fma x y z))) (x64_vfmadd213ss x y z)) -(rule 1 (lower (has_type (and (use_fma) $F64) (fma x y z))) +(rule 1 (lower (has_type (and (use_fma $true) $F64) (fma x y z))) (x64_vfmadd213sd x y z)) -(rule (lower (has_type (and (use_fma) $F32X4) (fma x y z))) +(rule (lower (has_type (and (use_fma $true) $F32X4) (fma x y z))) (x64_vfmadd213ps x y z)) -(rule (lower (has_type (and (use_fma) $F64X2) (fma x y z))) +(rule (lower (has_type (and (use_fma $true) $F64X2) (fma x y z))) (x64_vfmadd213pd x y z)) ;; Rules for `load*` ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; @@ -2993,7 +2993,7 @@ ;; ;; NOTE: the priority of 1 here is to break ties with the next case for $F32X4, ;; as it doesn't require either of the avx512 extensions to be enabled. -(rule 1 (lower (has_type (and (avx512vl_enabled) (avx512f_enabled) $F32X4) +(rule 1 (lower (has_type (and (avx512vl_enabled $true) (avx512f_enabled $true) $F32X4) (fcvt_from_uint src))) (x64_vcvtudq2ps src)) @@ -3332,82 +3332,82 @@ ;; Rules for `ceil` ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; -(rule (lower (has_type (use_sse41) (ceil a @ (value_type $F32)))) +(rule (lower (has_type (use_sse41 $true) (ceil a @ (value_type $F32)))) (x64_roundss a (RoundImm.RoundUp))) (rule (lower (ceil a @ (value_type $F32))) (libcall_1 (LibCall.CeilF32) a)) -(rule (lower (has_type (use_sse41) (ceil a @ (value_type $F64)))) +(rule (lower (has_type (use_sse41 $true) (ceil a @ (value_type $F64)))) (x64_roundsd a (RoundImm.RoundUp))) (rule (lower (ceil a @ (value_type $F64))) (libcall_1 (LibCall.CeilF64) a)) -(rule (lower (has_type (use_sse41) (ceil a @ (value_type $F32X4)))) +(rule (lower (has_type (use_sse41 $true) (ceil a @ (value_type $F32X4)))) (x64_roundps a (RoundImm.RoundUp))) -(rule (lower (has_type (use_sse41) (ceil a @ (value_type $F64X2)))) +(rule (lower (has_type (use_sse41 $true) (ceil a @ (value_type $F64X2)))) (x64_roundpd a (RoundImm.RoundUp))) ;; Rules for `floor` ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; -(rule (lower (has_type (use_sse41) (floor a @ (value_type $F32)))) +(rule (lower (has_type (use_sse41 $true) (floor a @ (value_type $F32)))) (x64_roundss a (RoundImm.RoundDown))) (rule (lower (floor a @ (value_type $F32))) (libcall_1 (LibCall.FloorF32) a)) -(rule (lower (has_type (use_sse41) (floor a @ (value_type $F64)))) +(rule (lower (has_type (use_sse41 $true) (floor a @ (value_type $F64)))) (x64_roundsd a (RoundImm.RoundDown))) (rule (lower (floor a @ (value_type $F64))) (libcall_1 (LibCall.FloorF64) a)) -(rule (lower (has_type (use_sse41) (floor a @ (value_type $F32X4)))) +(rule (lower (has_type (use_sse41 $true) (floor a @ (value_type $F32X4)))) (x64_roundps a (RoundImm.RoundDown))) -(rule (lower (has_type (use_sse41) (floor a @ (value_type $F64X2)))) +(rule (lower (has_type (use_sse41 $true) (floor a @ (value_type $F64X2)))) (x64_roundpd a (RoundImm.RoundDown))) ;; Rules for `nearest` ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; -(rule (lower (has_type (use_sse41) (nearest a @ (value_type $F32)))) +(rule (lower (has_type (use_sse41 $true) (nearest a @ (value_type $F32)))) (x64_roundss a (RoundImm.RoundNearest))) (rule (lower (nearest a @ (value_type $F32))) (libcall_1 (LibCall.NearestF32) a)) -(rule (lower (has_type (use_sse41) (nearest a @ (value_type $F64)))) +(rule (lower (has_type (use_sse41 $true) (nearest a @ (value_type $F64)))) (x64_roundsd a (RoundImm.RoundNearest))) (rule (lower (nearest a @ (value_type $F64))) (libcall_1 (LibCall.NearestF64) a)) -(rule (lower (has_type (use_sse41) (nearest a @ (value_type $F32X4)))) +(rule (lower (has_type (use_sse41 $true) (nearest a @ (value_type $F32X4)))) (x64_roundps a (RoundImm.RoundNearest))) -(rule (lower (has_type (use_sse41) (nearest a @ (value_type $F64X2)))) +(rule (lower (has_type (use_sse41 $true) (nearest a @ (value_type $F64X2)))) (x64_roundpd a (RoundImm.RoundNearest))) ;; Rules for `trunc` ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; -(rule (lower (has_type (use_sse41) (trunc a @ (value_type $F32)))) +(rule (lower (has_type (use_sse41 $true) (trunc a @ (value_type $F32)))) (x64_roundss a (RoundImm.RoundZero))) (rule (lower (trunc a @ (value_type $F32))) (libcall_1 (LibCall.TruncF32) a)) -(rule (lower (has_type (use_sse41) (trunc a @ (value_type $F64)))) +(rule (lower (has_type (use_sse41 $true) (trunc a @ (value_type $F64)))) (x64_roundsd a (RoundImm.RoundZero))) (rule (lower (trunc a @ (value_type $F64))) (libcall_1 (LibCall.TruncF64) a)) -(rule (lower (has_type (use_sse41) (trunc a @ (value_type $F32X4)))) +(rule (lower (has_type (use_sse41 $true) (trunc a @ (value_type $F32X4)))) (x64_roundps a (RoundImm.RoundZero))) -(rule (lower (has_type (use_sse41) (trunc a @ (value_type $F64X2)))) +(rule (lower (has_type (use_sse41 $true) (trunc a @ (value_type $F64X2)))) (x64_roundpd a (RoundImm.RoundZero))) ;; Rules for `stack_addr` ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; @@ -3506,7 +3506,7 @@ ;; For the case where the shuffle mask contains out-of-bounds values (values ;; greater than 31) we must mask off those resulting values in the result of ;; `vpermi2b`. -(rule (lower (has_type (and (avx512vl_enabled) (avx512vbmi_enabled)) +(rule (lower (has_type (and (avx512vl_enabled $true) (avx512vbmi_enabled $true)) (shuffle a b (vec_mask_from_immediate (perm_from_mask_with_zeros mask zeros))))) (x64_andps @@ -3515,7 +3515,7 @@ ;; However, if the shuffle mask contains no out-of-bounds values, we can use ;; `vpermi2b` without any masking. -(rule (lower (has_type (and (avx512vl_enabled) (avx512vbmi_enabled)) +(rule (lower (has_type (and (avx512vl_enabled $true) (avx512vbmi_enabled $true)) (shuffle a b (vec_mask_from_immediate mask)))) (x64_vpermi2b b a (x64_xmm_load_const $I8X16 (perm_from_mask mask)))) diff --git a/cranelift/codegen/src/isa/x64/lower/isle.rs b/cranelift/codegen/src/isa/x64/lower/isle.rs index 66e238165326..9c5d7476d903 100644 --- a/cranelift/codegen/src/isa/x64/lower/isle.rs +++ b/cranelift/codegen/src/isa/x64/lower/isle.rs @@ -209,93 +209,53 @@ impl Context for IsleContext<'_, '_, MInst, Flags, IsaFlags, 6> { } #[inline] - fn avx512vl_enabled(&mut self, _: Type) -> Option<()> { - if self.isa_flags.use_avx512vl_simd() { - Some(()) - } else { - None - } + fn avx512vl_enabled(&mut self, _: Type) -> bool { + self.isa_flags.use_avx512vl_simd() } #[inline] - fn avx512dq_enabled(&mut self, _: Type) -> Option<()> { - if self.isa_flags.use_avx512dq_simd() { - Some(()) - } else { - None - } + fn avx512dq_enabled(&mut self, _: Type) -> bool { + self.isa_flags.use_avx512dq_simd() } #[inline] - fn avx512f_enabled(&mut self, _: Type) -> Option<()> { - if self.isa_flags.use_avx512f_simd() { - Some(()) - } else { - None - } + fn avx512f_enabled(&mut self, _: Type) -> bool { + self.isa_flags.use_avx512f_simd() } #[inline] - fn avx512bitalg_enabled(&mut self, _: Type) -> Option<()> { - if self.isa_flags.use_avx512bitalg_simd() { - Some(()) - } else { - None - } + fn avx512bitalg_enabled(&mut self, _: Type) -> bool { + self.isa_flags.use_avx512bitalg_simd() } #[inline] - fn avx512vbmi_enabled(&mut self, _: Type) -> Option<()> { - if self.isa_flags.use_avx512vbmi_simd() { - Some(()) - } else { - None - } + fn avx512vbmi_enabled(&mut self, _: Type) -> bool { + self.isa_flags.use_avx512vbmi_simd() } #[inline] - fn use_lzcnt(&mut self, _: Type) -> Option<()> { - if self.isa_flags.use_lzcnt() { - Some(()) - } else { - None - } + fn use_lzcnt(&mut self, _: Type) -> bool { + self.isa_flags.use_lzcnt() } #[inline] - fn use_bmi1(&mut self, _: Type) -> Option<()> { - if self.isa_flags.use_bmi1() { - Some(()) - } else { - None - } + fn use_bmi1(&mut self, _: Type) -> bool { + self.isa_flags.use_bmi1() } #[inline] - fn use_popcnt(&mut self, _: Type) -> Option<()> { - if self.isa_flags.use_popcnt() { - Some(()) - } else { - None - } + fn use_popcnt(&mut self, _: Type) -> bool { + self.isa_flags.use_popcnt() } #[inline] - fn use_fma(&mut self, _: Type) -> Option<()> { - if self.isa_flags.use_fma() { - Some(()) - } else { - None - } + fn use_fma(&mut self, _: Type) -> bool { + self.isa_flags.use_fma() } #[inline] - fn use_sse41(&mut self, _: Type) -> Option<()> { - if self.isa_flags.use_sse41() { - Some(()) - } else { - None - } + fn use_sse41(&mut self, _: Type) -> bool { + self.isa_flags.use_sse41() } #[inline]