Skip to content

Commit 772ce73

Browse files
committed
Add x86_pblendw instruction
This instruction is necessary for lowering `fcvt_from_uint`.
1 parent 546fc9d commit 772ce73

File tree

5 files changed

+35
-0
lines changed

5 files changed

+35
-0
lines changed

cranelift/codegen/meta/src/isa/x86/encodings.rs

Lines changed: 8 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1639,6 +1639,7 @@ fn define_simd(
16391639
let x86_movlhps = x86.by_name("x86_movlhps");
16401640
let x86_movsd = x86.by_name("x86_movsd");
16411641
let x86_packss = x86.by_name("x86_packss");
1642+
let x86_pblendw = x86.by_name("x86_pblendw");
16421643
let x86_pextr = x86.by_name("x86_pextr");
16431644
let x86_pinsr = x86.by_name("x86_pinsr");
16441645
let x86_pmaxs = x86.by_name("x86_pmaxs");
@@ -1744,6 +1745,13 @@ fn define_simd(
17441745
e.enc_both_inferred_maybe_isap(instruction, template, Some(use_sse41_simd));
17451746
}
17461747

1748+
// PBLENDW, select lanes using a u8 immediate.
1749+
for ty in ValueType::all_lane_types().filter(|t| t.lane_bits() == 16) {
1750+
let instruction = x86_pblendw.bind(vector(ty, sse_vector_size));
1751+
let template = rec_fa_ib.opcodes(&PBLENDW);
1752+
e.enc_both_inferred_maybe_isap(instruction, template, Some(use_sse41_simd));
1753+
}
1754+
17471755
// SIMD scalar_to_vector; this uses MOV to copy the scalar value to an XMM register; according
17481756
// to the Intel manual: "When the destination operand is an XMM register, the source operand is
17491757
// written to the low doubleword of the register and the register is zero-extended to 128 bits."

cranelift/codegen/meta/src/isa/x86/instructions.rs

Lines changed: 14 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -333,6 +333,20 @@ pub(crate) fn define(
333333
.operands_out(vec![a]),
334334
);
335335

336+
let mask = &Operand::new("mask", uimm8).with_doc("mask to select lanes from b");
337+
ig.push(
338+
Inst::new(
339+
"x86_pblendw",
340+
r#"
341+
Blend packed words using an immediate mask. Each bit of the 8-bit immediate corresponds to a
342+
lane in ``b``: if the bit is set, the lane is copied into ``a``.
343+
"#,
344+
&formats.ternary_imm8,
345+
)
346+
.operands_in(vec![a, b, mask])
347+
.operands_out(vec![a]),
348+
);
349+
336350
let Idx = &Operand::new("Idx", uimm8).with_doc("Lane index");
337351
let x = &Operand::new("x", TxN);
338352
let a = &Operand::new("a", &TxN.lane_of());

cranelift/codegen/meta/src/isa/x86/opcodes.rs

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -347,6 +347,10 @@ pub static PAVGW: [u8; 3] = [0x66, 0x0f, 0xE3];
347347
/// in XMM0 and store the values into xmm1 (SSE4.1).
348348
pub static PBLENDVB: [u8; 4] = [0x66, 0x0f, 0x38, 0x10];
349349

350+
/// Select words from xmm1 and xmm2/m128 from mask specified in imm8 and store the values into xmm1
351+
/// (SSE4.1).
352+
pub static PBLENDW: [u8; 4] = [0x66, 0x0f, 0x3a, 0x0e];
353+
350354
/// Compare packed data for equal (SSE2).
351355
pub static PCMPEQB: [u8; 3] = [0x66, 0x0f, 0x74];
352356

cranelift/codegen/src/isa/aarch64/lower_inst.rs

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -2046,6 +2046,7 @@ pub(crate) fn lower_insn_to_regs<C: LowerCtx<I = Inst>>(
20462046
| Opcode::X86Pop
20472047
| Opcode::X86Bsr
20482048
| Opcode::X86Bsf
2049+
| Opcode::X86Pblendw
20492050
| Opcode::X86Pshufd
20502051
| Opcode::X86Pshufb
20512052
| Opcode::X86Pextr

cranelift/filetests/filetests/isa/x86/simd-lane-access-binemit.clif

Lines changed: 8 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -96,6 +96,14 @@ block0:
9696
return
9797
}
9898

99+
;; blend
100+
101+
function %pblendw(b16x8, b16x8) {
102+
block0(v0: b16x8 [%xmm10], v1: b16x8 [%xmm2]):
103+
[-, %xmm10] v2 = x86_pblendw v0, v1, 0x55 ; bin: 66 44 0f 3a 0e d2 55
104+
return
105+
}
106+
99107
;; pack/unpack
100108

101109
function %unpack_high_i8x16(i8x16, i8x16) {

0 commit comments

Comments
 (0)