Skip to content

Replace some calls to pointer::offset with add and sub #1323

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Merged
merged 1 commit into from
Aug 22, 2022
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
6 changes: 3 additions & 3 deletions crates/core_arch/src/x86/avx512gfni.rs
Original file line number Diff line number Diff line change
Expand Up @@ -829,21 +829,21 @@ mod tests {
#[target_feature(enable = "sse2")]
unsafe fn load_m128i_word<T>(data: &[T], word_index: usize) -> __m128i {
let byte_offset = word_index * 16 / size_of::<T>();
let pointer = data.as_ptr().offset(byte_offset as isize) as *const __m128i;
let pointer = data.as_ptr().add(byte_offset) as *const __m128i;
_mm_loadu_si128(black_box(pointer))
}

#[target_feature(enable = "avx")]
unsafe fn load_m256i_word<T>(data: &[T], word_index: usize) -> __m256i {
let byte_offset = word_index * 32 / size_of::<T>();
let pointer = data.as_ptr().offset(byte_offset as isize) as *const __m256i;
let pointer = data.as_ptr().add(byte_offset) as *const __m256i;
_mm256_loadu_si256(black_box(pointer))
}

#[target_feature(enable = "avx512f")]
unsafe fn load_m512i_word<T>(data: &[T], word_index: usize) -> __m512i {
let byte_offset = word_index * 64 / size_of::<T>();
let pointer = data.as_ptr().offset(byte_offset as isize) as *const i32;
let pointer = data.as_ptr().add(byte_offset) as *const i32;
_mm512_loadu_si512(black_box(pointer))
}

Expand Down
32 changes: 16 additions & 16 deletions crates/core_arch/src/x86/sse.rs
Original file line number Diff line number Diff line change
Expand Up @@ -1185,9 +1185,9 @@ pub unsafe fn _mm_loadu_ps(p: *const f32) -> __m128 {
///
/// ```text
/// let a0 = *p;
/// let a1 = *p.offset(1);
/// let a2 = *p.offset(2);
/// let a3 = *p.offset(3);
/// let a1 = *p.add(1);
/// let a2 = *p.add(2);
/// let a3 = *p.add(3);
/// __m128::new(a3, a2, a1, a0)
/// ```
///
Expand Down Expand Up @@ -1241,9 +1241,9 @@ pub unsafe fn _mm_store_ss(p: *mut f32, a: __m128) {
/// ```text
/// let x = a.extract(0);
/// *p = x;
/// *p.offset(1) = x;
/// *p.offset(2) = x;
/// *p.offset(3) = x;
/// *p.add(1) = x;
/// *p.add(2) = x;
/// *p.add(3) = x;
/// ```
///
/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_store1_ps)
Expand Down Expand Up @@ -1317,9 +1317,9 @@ pub unsafe fn _mm_storeu_ps(p: *mut f32, a: __m128) {
///
/// ```text
/// *p = a.extract(3);
/// *p.offset(1) = a.extract(2);
/// *p.offset(2) = a.extract(1);
/// *p.offset(3) = a.extract(0);
/// *p.add(1) = a.extract(2);
/// *p.add(2) = a.extract(1);
/// *p.add(3) = a.extract(0);
/// ```
///
/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_storer_ps)
Expand Down Expand Up @@ -3006,9 +3006,9 @@ mod tests {

let unalignment = (p as usize) & 0xf;
if unalignment != 0 {
let delta = ((16 - unalignment) >> 2) as isize;
let delta = (16 - unalignment) >> 2;
fixup = delta as f32;
p = p.offset(delta);
p = p.add(delta);
}

let r = _mm_load_ps(p);
Expand All @@ -3019,7 +3019,7 @@ mod tests {
#[simd_test(enable = "sse")]
unsafe fn test_mm_loadu_ps() {
let vals = &[1.0f32, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0];
let p = vals.as_ptr().offset(3);
let p = vals.as_ptr().add(3);
let r = _mm_loadu_ps(black_box(p));
assert_eq_m128(r, _mm_setr_ps(4.0, 5.0, 6.0, 7.0));
}
Expand All @@ -3036,9 +3036,9 @@ mod tests {

let unalignment = (p as usize) & 0xf;
if unalignment != 0 {
let delta = ((16 - unalignment) >> 2) as isize;
let delta = (16 - unalignment) >> 2;
fixup = delta as f32;
p = p.offset(delta);
p = p.add(delta);
}

let r = _mm_loadr_ps(p);
Expand All @@ -3057,7 +3057,7 @@ mod tests {
unsafe fn test_mm_store_ss() {
let mut vals = [0.0f32; 8];
let a = _mm_setr_ps(1.0, 2.0, 3.0, 4.0);
_mm_store_ss(vals.as_mut_ptr().offset(1), a);
_mm_store_ss(vals.as_mut_ptr().add(1), a);

assert_eq!(vals[0], 0.0);
assert_eq!(vals[1], 1.0);
Expand Down Expand Up @@ -3152,7 +3152,7 @@ mod tests {
// Make sure p is **not** aligned to 16-byte boundary
if (p as usize) & 0xf == 0 {
ofs = 1;
p = p.offset(1);
p = p.add(1);
}

_mm_storeu_ps(p, *black_box(&a));
Expand Down
4 changes: 2 additions & 2 deletions crates/core_arch/src/x86/sse2.rs
Original file line number Diff line number Diff line change
Expand Up @@ -4518,7 +4518,7 @@ mod tests {
// Make sure p is **not** aligned to 16-byte boundary
if (p as usize) & 0xf == 0 {
ofs = 1;
p = p.offset(1);
p = p.add(1);
}

_mm_storeu_pd(p, *black_box(&a));
Expand Down Expand Up @@ -4606,7 +4606,7 @@ mod tests {
let mut offset = 0;
if (d as usize) & 0xf == 0 {
offset = 1;
d = d.offset(offset as isize);
d = d.add(offset);
}

let r = _mm_loadu_pd(d);
Expand Down
27 changes: 12 additions & 15 deletions examples/hex.rs
Original file line number Diff line number Diff line change
Expand Up @@ -76,7 +76,7 @@ unsafe fn hex_encode_avx2<'a>(mut src: &[u8], dst: &'a mut [u8]) -> Result<&'a s
let ascii_a = _mm256_set1_epi8((b'a' - 9 - 1) as i8);
let and4bits = _mm256_set1_epi8(0xf);

let mut i = 0_isize;
let mut i = 0_usize;
while src.len() >= 32 {
let invec = _mm256_loadu_si256(src.as_ptr() as *const _);

Expand All @@ -96,18 +96,17 @@ unsafe fn hex_encode_avx2<'a>(mut src: &[u8], dst: &'a mut [u8]) -> Result<&'a s
let res2 = _mm256_unpackhi_epi8(masked2, masked1);

// Store everything into the right destination now
let base = dst.as_mut_ptr().offset(i * 2);
let base1 = base.offset(0) as *mut _;
let base2 = base.offset(16) as *mut _;
let base3 = base.offset(32) as *mut _;
let base4 = base.offset(48) as *mut _;
let base = dst.as_mut_ptr().add(i * 2);
let base1 = base.add(0) as *mut _;
let base2 = base.add(16) as *mut _;
let base3 = base.add(32) as *mut _;
let base4 = base.add(48) as *mut _;
_mm256_storeu2_m128i(base3, base1, res1);
_mm256_storeu2_m128i(base4, base2, res2);
src = &src[32..];
i += 32;
}

let i = i as usize;
let _ = hex_encode_sse41(src, &mut dst[i * 2..]);

Ok(str::from_utf8_unchecked(&dst[..src.len() * 2 + i * 2]))
Expand All @@ -122,7 +121,7 @@ unsafe fn hex_encode_sse41<'a>(mut src: &[u8], dst: &'a mut [u8]) -> Result<&'a
let ascii_a = _mm_set1_epi8((b'a' - 9 - 1) as i8);
let and4bits = _mm_set1_epi8(0xf);

let mut i = 0_isize;
let mut i = 0_usize;
while src.len() >= 16 {
let invec = _mm_loadu_si128(src.as_ptr() as *const _);

Expand All @@ -141,13 +140,12 @@ unsafe fn hex_encode_sse41<'a>(mut src: &[u8], dst: &'a mut [u8]) -> Result<&'a
let res1 = _mm_unpacklo_epi8(masked2, masked1);
let res2 = _mm_unpackhi_epi8(masked2, masked1);

_mm_storeu_si128(dst.as_mut_ptr().offset(i * 2) as *mut _, res1);
_mm_storeu_si128(dst.as_mut_ptr().offset(i * 2 + 16) as *mut _, res2);
_mm_storeu_si128(dst.as_mut_ptr().add(i * 2) as *mut _, res1);
_mm_storeu_si128(dst.as_mut_ptr().add(i * 2 + 16) as *mut _, res2);
src = &src[16..];
i += 16;
}

let i = i as usize;
let _ = hex_encode_fallback(src, &mut dst[i * 2..]);

Ok(str::from_utf8_unchecked(&dst[..src.len() * 2 + i * 2]))
Expand All @@ -163,7 +161,7 @@ unsafe fn hex_encode_simd128<'a>(mut src: &[u8], dst: &'a mut [u8]) -> Result<&'
let ascii_a = u8x16_splat(b'a' - 9 - 1);
let and4bits = u8x16_splat(0xf);

let mut i = 0_isize;
let mut i = 0_usize;
while src.len() >= 16 {
let invec = v128_load(src.as_ptr() as *const _);

Expand All @@ -189,13 +187,12 @@ unsafe fn hex_encode_simd128<'a>(mut src: &[u8], dst: &'a mut [u8]) -> Result<&'
masked2, masked1,
);

v128_store(dst.as_mut_ptr().offset(i * 2) as *mut _, res1);
v128_store(dst.as_mut_ptr().offset(i * 2 + 16) as *mut _, res2);
v128_store(dst.as_mut_ptr().add(i * 2) as *mut _, res1);
v128_store(dst.as_mut_ptr().add(i * 2 + 16) as *mut _, res2);
src = &src[16..];
i += 16;
}

let i = i as usize;
let _ = hex_encode_fallback(src, &mut dst[i * 2..]);

Ok(str::from_utf8_unchecked(&dst[..src.len() * 2 + i * 2]))
Expand Down