Skip to content

Commit

Permalink
Rework the ISA flag checking extractors for x64 (bytecodealliance#4878)
Browse files Browse the repository at this point in the history
Using fallible extractors that produce no values for flag checks means
that it's not possible to pattern match cases where those flags are
false. This change reworks the existing flag-checking extractors to be
infallible, returning the flag's boolean value from the context instead.
  • Loading branch information
elliottt authored Sep 7, 2022
1 parent f063082 commit caad148
Show file tree
Hide file tree
Showing 3 changed files with 74 additions and 114 deletions.
40 changes: 20 additions & 20 deletions cranelift/codegen/src/isa/x64/inst.isle
Original file line number Diff line number Diff line change
Expand Up @@ -1386,35 +1386,35 @@

;;;; Helpers for Querying Enabled ISA Extensions ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;

(decl avx512vl_enabled () Type)
(extern extractor avx512vl_enabled avx512vl_enabled)
(decl avx512vl_enabled (bool) Type)
(extern extractor infallible avx512vl_enabled avx512vl_enabled)

(decl avx512dq_enabled () Type)
(extern extractor avx512dq_enabled avx512dq_enabled)
(decl avx512dq_enabled (bool) Type)
(extern extractor infallible avx512dq_enabled avx512dq_enabled)

(decl avx512f_enabled () Type)
(extern extractor avx512f_enabled avx512f_enabled)
(decl avx512f_enabled (bool) Type)
(extern extractor infallible avx512f_enabled avx512f_enabled)

(decl avx512bitalg_enabled () Type)
(extern extractor avx512bitalg_enabled avx512bitalg_enabled)
(decl avx512bitalg_enabled (bool) Type)
(extern extractor infallible avx512bitalg_enabled avx512bitalg_enabled)

(decl avx512vbmi_enabled () Type)
(extern extractor avx512vbmi_enabled avx512vbmi_enabled)
(decl avx512vbmi_enabled (bool) Type)
(extern extractor infallible avx512vbmi_enabled avx512vbmi_enabled)

(decl use_lzcnt () Type)
(extern extractor use_lzcnt use_lzcnt)
(decl use_lzcnt (bool) Type)
(extern extractor infallible use_lzcnt use_lzcnt)

(decl use_bmi1 () Type)
(extern extractor use_bmi1 use_bmi1)
(decl use_bmi1 (bool) Type)
(extern extractor infallible use_bmi1 use_bmi1)

(decl use_popcnt () Type)
(extern extractor use_popcnt use_popcnt)
(decl use_popcnt (bool) Type)
(extern extractor infallible use_popcnt use_popcnt)

(decl use_fma () Type)
(extern extractor use_fma use_fma)
(decl use_fma (bool) Type)
(extern extractor infallible use_fma use_fma)

(decl use_sse41 () Type)
(extern extractor use_sse41 use_sse41)
(decl use_sse41 (bool) Type)
(extern extractor infallible use_sse41 use_sse41)

;;;; Helpers for Merging and Sinking Immediates/Loads ;;;;;;;;;;;;;;;;;;;;;;;;;

Expand Down
68 changes: 34 additions & 34 deletions cranelift/codegen/src/isa/x64/lower.isle
Original file line number Diff line number Diff line change
Expand Up @@ -939,8 +939,8 @@

;; With AVX-512 we can implement `i64x2` multiplication with a single
;; instruction.
(rule (lower (has_type (and (avx512vl_enabled)
(avx512dq_enabled)
(rule (lower (has_type (and (avx512vl_enabled $true)
(avx512dq_enabled $true)
(multi_lane 64 2))
(imul x y)))
(x64_vpmullq x y))
Expand Down Expand Up @@ -1167,8 +1167,8 @@
(x64_pabsd x))

;; When AVX512 is available, we can use a single `vpabsq` instruction.
(rule (lower (has_type (and (avx512vl_enabled)
(avx512f_enabled)
(rule (lower (has_type (and (avx512vl_enabled $true)
(avx512f_enabled $true)
$I64X2)
(iabs x)))
(x64_vpabsq x))
Expand Down Expand Up @@ -1733,7 +1733,7 @@
(rule 1 (lower
(has_type (and
(ty_32_or_64 ty)
(use_lzcnt))
(use_lzcnt $true))
(clz src)))
(x64_lzcnt ty src))

Expand Down Expand Up @@ -1775,7 +1775,7 @@
(rule 1 (lower
(has_type (and
(ty_32_or_64 ty)
(use_bmi1))
(use_bmi1 $true))
(ctz src)))
(x64_tzcnt ty src))

Expand Down Expand Up @@ -1811,21 +1811,21 @@
(rule 1 (lower
(has_type (and
(ty_32_or_64 ty)
(use_popcnt))
(use_popcnt $true))
(popcnt src)))
(x64_popcnt ty src))

(rule 1 (lower
(has_type (and
(ty_8_or_16 ty)
(use_popcnt))
(use_popcnt $true))
(popcnt src)))
(x64_popcnt $I32 (extend_to_gpr src $I32 (ExtendKind.Zero))))

(rule 1 (lower
(has_type (and
$I128
(use_popcnt))
(use_popcnt $true))
(popcnt src)))
(let ((lo_count Gpr (x64_popcnt $I64 (value_regs_get_gpr src 0)))
(hi_count Gpr (x64_popcnt $I64 (value_regs_get_gpr src 1))))
Expand Down Expand Up @@ -1916,8 +1916,8 @@

(rule 1 (lower (has_type (and
$I8X16
(avx512vl_enabled)
(avx512bitalg_enabled))
(avx512vl_enabled $true)
(avx512bitalg_enabled $true))
(popcnt src)))
(x64_vpopcntb src))

Expand Down Expand Up @@ -2480,13 +2480,13 @@
(libcall_3 (LibCall.FmaF32) x y z))
(rule (lower (has_type $F64 (fma x y z)))
(libcall_3 (LibCall.FmaF64) x y z))
(rule 1 (lower (has_type (and (use_fma) $F32) (fma x y z)))
(rule 1 (lower (has_type (and (use_fma $true) $F32) (fma x y z)))
(x64_vfmadd213ss x y z))
(rule 1 (lower (has_type (and (use_fma) $F64) (fma x y z)))
(rule 1 (lower (has_type (and (use_fma $true) $F64) (fma x y z)))
(x64_vfmadd213sd x y z))
(rule (lower (has_type (and (use_fma) $F32X4) (fma x y z)))
(rule (lower (has_type (and (use_fma $true) $F32X4) (fma x y z)))
(x64_vfmadd213ps x y z))
(rule (lower (has_type (and (use_fma) $F64X2) (fma x y z)))
(rule (lower (has_type (and (use_fma $true) $F64X2) (fma x y z)))
(x64_vfmadd213pd x y z))

;; Rules for `load*` ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
Expand Down Expand Up @@ -2993,7 +2993,7 @@
;;
;; NOTE: the priority of 1 here is to break ties with the next case for $F32X4,
;; as it doesn't require either of the avx512 extensions to be enabled.
(rule 1 (lower (has_type (and (avx512vl_enabled) (avx512f_enabled) $F32X4)
(rule 1 (lower (has_type (and (avx512vl_enabled $true) (avx512f_enabled $true) $F32X4)
(fcvt_from_uint src)))
(x64_vcvtudq2ps src))

Expand Down Expand Up @@ -3332,82 +3332,82 @@

;; Rules for `ceil` ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;

(rule (lower (has_type (use_sse41) (ceil a @ (value_type $F32))))
(rule (lower (has_type (use_sse41 $true) (ceil a @ (value_type $F32))))
(x64_roundss a (RoundImm.RoundUp)))

(rule (lower (ceil a @ (value_type $F32)))
(libcall_1 (LibCall.CeilF32) a))

(rule (lower (has_type (use_sse41) (ceil a @ (value_type $F64))))
(rule (lower (has_type (use_sse41 $true) (ceil a @ (value_type $F64))))
(x64_roundsd a (RoundImm.RoundUp)))

(rule (lower (ceil a @ (value_type $F64)))
(libcall_1 (LibCall.CeilF64) a))

(rule (lower (has_type (use_sse41) (ceil a @ (value_type $F32X4))))
(rule (lower (has_type (use_sse41 $true) (ceil a @ (value_type $F32X4))))
(x64_roundps a (RoundImm.RoundUp)))

(rule (lower (has_type (use_sse41) (ceil a @ (value_type $F64X2))))
(rule (lower (has_type (use_sse41 $true) (ceil a @ (value_type $F64X2))))
(x64_roundpd a (RoundImm.RoundUp)))

;; Rules for `floor` ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;

(rule (lower (has_type (use_sse41) (floor a @ (value_type $F32))))
(rule (lower (has_type (use_sse41 $true) (floor a @ (value_type $F32))))
(x64_roundss a (RoundImm.RoundDown)))

(rule (lower (floor a @ (value_type $F32)))
(libcall_1 (LibCall.FloorF32) a))

(rule (lower (has_type (use_sse41) (floor a @ (value_type $F64))))
(rule (lower (has_type (use_sse41 $true) (floor a @ (value_type $F64))))
(x64_roundsd a (RoundImm.RoundDown)))

(rule (lower (floor a @ (value_type $F64)))
(libcall_1 (LibCall.FloorF64) a))

(rule (lower (has_type (use_sse41) (floor a @ (value_type $F32X4))))
(rule (lower (has_type (use_sse41 $true) (floor a @ (value_type $F32X4))))
(x64_roundps a (RoundImm.RoundDown)))

(rule (lower (has_type (use_sse41) (floor a @ (value_type $F64X2))))
(rule (lower (has_type (use_sse41 $true) (floor a @ (value_type $F64X2))))
(x64_roundpd a (RoundImm.RoundDown)))

;; Rules for `nearest` ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;

(rule (lower (has_type (use_sse41) (nearest a @ (value_type $F32))))
(rule (lower (has_type (use_sse41 $true) (nearest a @ (value_type $F32))))
(x64_roundss a (RoundImm.RoundNearest)))

(rule (lower (nearest a @ (value_type $F32)))
(libcall_1 (LibCall.NearestF32) a))

(rule (lower (has_type (use_sse41) (nearest a @ (value_type $F64))))
(rule (lower (has_type (use_sse41 $true) (nearest a @ (value_type $F64))))
(x64_roundsd a (RoundImm.RoundNearest)))

(rule (lower (nearest a @ (value_type $F64)))
(libcall_1 (LibCall.NearestF64) a))

(rule (lower (has_type (use_sse41) (nearest a @ (value_type $F32X4))))
(rule (lower (has_type (use_sse41 $true) (nearest a @ (value_type $F32X4))))
(x64_roundps a (RoundImm.RoundNearest)))

(rule (lower (has_type (use_sse41) (nearest a @ (value_type $F64X2))))
(rule (lower (has_type (use_sse41 $true) (nearest a @ (value_type $F64X2))))
(x64_roundpd a (RoundImm.RoundNearest)))

;; Rules for `trunc` ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;

(rule (lower (has_type (use_sse41) (trunc a @ (value_type $F32))))
(rule (lower (has_type (use_sse41 $true) (trunc a @ (value_type $F32))))
(x64_roundss a (RoundImm.RoundZero)))

(rule (lower (trunc a @ (value_type $F32)))
(libcall_1 (LibCall.TruncF32) a))

(rule (lower (has_type (use_sse41) (trunc a @ (value_type $F64))))
(rule (lower (has_type (use_sse41 $true) (trunc a @ (value_type $F64))))
(x64_roundsd a (RoundImm.RoundZero)))

(rule (lower (trunc a @ (value_type $F64)))
(libcall_1 (LibCall.TruncF64) a))

(rule (lower (has_type (use_sse41) (trunc a @ (value_type $F32X4))))
(rule (lower (has_type (use_sse41 $true) (trunc a @ (value_type $F32X4))))
(x64_roundps a (RoundImm.RoundZero)))

(rule (lower (has_type (use_sse41) (trunc a @ (value_type $F64X2))))
(rule (lower (has_type (use_sse41 $true) (trunc a @ (value_type $F64X2))))
(x64_roundpd a (RoundImm.RoundZero)))

;; Rules for `stack_addr` ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
Expand Down Expand Up @@ -3506,7 +3506,7 @@
;; For the case where the shuffle mask contains out-of-bounds values (values
;; greater than 31) we must mask off those resulting values in the result of
;; `vpermi2b`.
(rule (lower (has_type (and (avx512vl_enabled) (avx512vbmi_enabled))
(rule (lower (has_type (and (avx512vl_enabled $true) (avx512vbmi_enabled $true))
(shuffle a b (vec_mask_from_immediate
(perm_from_mask_with_zeros mask zeros)))))
(x64_andps
Expand All @@ -3515,7 +3515,7 @@

;; However, if the shuffle mask contains no out-of-bounds values, we can use
;; `vpermi2b` without any masking.
(rule (lower (has_type (and (avx512vl_enabled) (avx512vbmi_enabled))
(rule (lower (has_type (and (avx512vl_enabled $true) (avx512vbmi_enabled $true))
(shuffle a b (vec_mask_from_immediate mask))))
(x64_vpermi2b b a (x64_xmm_load_const $I8X16 (perm_from_mask mask))))

Expand Down
80 changes: 20 additions & 60 deletions cranelift/codegen/src/isa/x64/lower/isle.rs
Original file line number Diff line number Diff line change
Expand Up @@ -209,93 +209,53 @@ impl Context for IsleContext<'_, '_, MInst, Flags, IsaFlags, 6> {
}

#[inline]
fn avx512vl_enabled(&mut self, _: Type) -> Option<()> {
if self.isa_flags.use_avx512vl_simd() {
Some(())
} else {
None
}
fn avx512vl_enabled(&mut self, _: Type) -> bool {
self.isa_flags.use_avx512vl_simd()
}

#[inline]
fn avx512dq_enabled(&mut self, _: Type) -> Option<()> {
if self.isa_flags.use_avx512dq_simd() {
Some(())
} else {
None
}
fn avx512dq_enabled(&mut self, _: Type) -> bool {
self.isa_flags.use_avx512dq_simd()
}

#[inline]
fn avx512f_enabled(&mut self, _: Type) -> Option<()> {
if self.isa_flags.use_avx512f_simd() {
Some(())
} else {
None
}
fn avx512f_enabled(&mut self, _: Type) -> bool {
self.isa_flags.use_avx512f_simd()
}

#[inline]
fn avx512bitalg_enabled(&mut self, _: Type) -> Option<()> {
if self.isa_flags.use_avx512bitalg_simd() {
Some(())
} else {
None
}
fn avx512bitalg_enabled(&mut self, _: Type) -> bool {
self.isa_flags.use_avx512bitalg_simd()
}

#[inline]
fn avx512vbmi_enabled(&mut self, _: Type) -> Option<()> {
if self.isa_flags.use_avx512vbmi_simd() {
Some(())
} else {
None
}
fn avx512vbmi_enabled(&mut self, _: Type) -> bool {
self.isa_flags.use_avx512vbmi_simd()
}

#[inline]
fn use_lzcnt(&mut self, _: Type) -> Option<()> {
if self.isa_flags.use_lzcnt() {
Some(())
} else {
None
}
fn use_lzcnt(&mut self, _: Type) -> bool {
self.isa_flags.use_lzcnt()
}

#[inline]
fn use_bmi1(&mut self, _: Type) -> Option<()> {
if self.isa_flags.use_bmi1() {
Some(())
} else {
None
}
fn use_bmi1(&mut self, _: Type) -> bool {
self.isa_flags.use_bmi1()
}

#[inline]
fn use_popcnt(&mut self, _: Type) -> Option<()> {
if self.isa_flags.use_popcnt() {
Some(())
} else {
None
}
fn use_popcnt(&mut self, _: Type) -> bool {
self.isa_flags.use_popcnt()
}

#[inline]
fn use_fma(&mut self, _: Type) -> Option<()> {
if self.isa_flags.use_fma() {
Some(())
} else {
None
}
fn use_fma(&mut self, _: Type) -> bool {
self.isa_flags.use_fma()
}

#[inline]
fn use_sse41(&mut self, _: Type) -> Option<()> {
if self.isa_flags.use_sse41() {
Some(())
} else {
None
}
fn use_sse41(&mut self, _: Type) -> bool {
self.isa_flags.use_sse41()
}

#[inline]
Expand Down

0 comments on commit caad148

Please sign in to comment.