Skip to content

Commit 9d6623e

Browse files
committed
Auto merge of #3492 - eduardosm:intrinsics-x86-avx2, r=oli-obk
Implement LLVM x86 AVX2 intrinsics
2 parents e989fe7 + a79b1f1 commit 9d6623e

File tree

8 files changed

+2474
-257
lines changed

8 files changed

+2474
-257
lines changed

clippy.toml

+1
Original file line numberDiff line numberDiff line change
@@ -0,0 +1 @@
1+
arithmetic-side-effects-allowed = ["rustc_target::abi::Size"]

src/shims/x86/avx.rs

+2-69
Original file line numberDiff line numberDiff line change
@@ -7,7 +7,8 @@ use rustc_target::spec::abi::Abi;
77

88
use super::{
99
bin_op_simd_float_all, conditional_dot_product, convert_float_to_int, horizontal_bin_op,
10-
round_all, test_bits_masked, test_high_bits_masked, unary_op_ps, FloatBinOp, FloatUnaryOp,
10+
mask_load, mask_store, round_all, test_bits_masked, test_high_bits_masked, unary_op_ps,
11+
FloatBinOp, FloatUnaryOp,
1112
};
1213
use crate::*;
1314
use shims::foreign_items::EmulateForeignItemResult;
@@ -347,71 +348,3 @@ pub(super) trait EvalContextExt<'mir, 'tcx: 'mir>:
347348
Ok(EmulateForeignItemResult::NeedsJumping)
348349
}
349350
}
350-
351-
/// Conditionally loads from `ptr` according the high bit of each
352-
/// element of `mask`. `ptr` does not need to be aligned.
353-
fn mask_load<'tcx>(
354-
this: &mut crate::MiriInterpCx<'_, 'tcx>,
355-
ptr: &OpTy<'tcx, Provenance>,
356-
mask: &OpTy<'tcx, Provenance>,
357-
dest: &MPlaceTy<'tcx, Provenance>,
358-
) -> InterpResult<'tcx, ()> {
359-
let (mask, mask_len) = this.operand_to_simd(mask)?;
360-
let (dest, dest_len) = this.mplace_to_simd(dest)?;
361-
362-
assert_eq!(dest_len, mask_len);
363-
364-
let mask_item_size = mask.layout.field(this, 0).size;
365-
let high_bit_offset = mask_item_size.bits().checked_sub(1).unwrap();
366-
367-
let ptr = this.read_pointer(ptr)?;
368-
for i in 0..dest_len {
369-
let mask = this.project_index(&mask, i)?;
370-
let dest = this.project_index(&dest, i)?;
371-
372-
if this.read_scalar(&mask)?.to_uint(mask_item_size)? >> high_bit_offset != 0 {
373-
// Size * u64 is implemented as always checked
374-
#[allow(clippy::arithmetic_side_effects)]
375-
let ptr = ptr.wrapping_offset(dest.layout.size * i, &this.tcx);
376-
// Unaligned copy, which is what we want.
377-
this.mem_copy(ptr, dest.ptr(), dest.layout.size, /*nonoverlapping*/ true)?;
378-
} else {
379-
this.write_scalar(Scalar::from_int(0, dest.layout.size), &dest)?;
380-
}
381-
}
382-
383-
Ok(())
384-
}
385-
386-
/// Conditionally stores into `ptr` according the high bit of each
387-
/// element of `mask`. `ptr` does not need to be aligned.
388-
fn mask_store<'tcx>(
389-
this: &mut crate::MiriInterpCx<'_, 'tcx>,
390-
ptr: &OpTy<'tcx, Provenance>,
391-
mask: &OpTy<'tcx, Provenance>,
392-
value: &OpTy<'tcx, Provenance>,
393-
) -> InterpResult<'tcx, ()> {
394-
let (mask, mask_len) = this.operand_to_simd(mask)?;
395-
let (value, value_len) = this.operand_to_simd(value)?;
396-
397-
assert_eq!(value_len, mask_len);
398-
399-
let mask_item_size = mask.layout.field(this, 0).size;
400-
let high_bit_offset = mask_item_size.bits().checked_sub(1).unwrap();
401-
402-
let ptr = this.read_pointer(ptr)?;
403-
for i in 0..value_len {
404-
let mask = this.project_index(&mask, i)?;
405-
let value = this.project_index(&value, i)?;
406-
407-
if this.read_scalar(&mask)?.to_uint(mask_item_size)? >> high_bit_offset != 0 {
408-
// Size * u64 is implemented as always checked
409-
#[allow(clippy::arithmetic_side_effects)]
410-
let ptr = ptr.wrapping_offset(value.layout.size * i, &this.tcx);
411-
// Unaligned copy, which is what we want.
412-
this.mem_copy(value.ptr(), ptr, value.layout.size, /*nonoverlapping*/ true)?;
413-
}
414-
}
415-
416-
Ok(())
417-
}

src/shims/x86/avx2.rs

+444
Large diffs are not rendered by default.

0 commit comments

Comments
 (0)