Skip to content

Commit a371069

Browse files
author
Daniel Smith
authored
Fix or equals integer comparisons (#872)
1 parent f46a47f commit a371069

File tree

2 files changed

+56
-50
lines changed

2 files changed

+56
-50
lines changed

crates/core_arch/src/x86/avx512f.rs

+36-30
Original file line numberDiff line numberDiff line change
@@ -1014,7 +1014,7 @@ pub unsafe fn _mm512_mask_cmpgt_epu32_mask(m: __mmask16, a: __m512i, b: __m512i)
10141014
#[target_feature(enable = "avx512f")]
10151015
#[cfg_attr(test, assert_instr(vpcmp))]
10161016
pub unsafe fn _mm512_cmple_epu32_mask(a: __m512i, b: __m512i) -> __mmask16 {
1017-
_mm512_cmpgt_epu32_mask(b, a)
1017+
simd_bitmask::<u32x16, _>(simd_le(a.as_u32x16(), b.as_u32x16()))
10181018
}
10191019

10201020
/// Compare packed unsigned 32-bit integers in a and b for less-than-or-equal, and store the results in a mask vector k
@@ -1025,7 +1025,7 @@ pub unsafe fn _mm512_cmple_epu32_mask(a: __m512i, b: __m512i) -> __mmask16 {
10251025
#[target_feature(enable = "avx512f")]
10261026
#[cfg_attr(test, assert_instr(vpcmp))]
10271027
pub unsafe fn _mm512_mask_cmple_epu32_mask(m: __mmask16, a: __m512i, b: __m512i) -> __mmask16 {
1028-
_mm512_cmpgt_epu32_mask(b, a) & m
1028+
_mm512_cmple_epu32_mask(a, b) & m
10291029
}
10301030

10311031
/// Compare packed unsigned 32-bit integers in a and b for greater-than-or-equal, and store the results in a mask vector.
@@ -1035,7 +1035,7 @@ pub unsafe fn _mm512_mask_cmple_epu32_mask(m: __mmask16, a: __m512i, b: __m512i)
10351035
#[target_feature(enable = "avx512f")]
10361036
#[cfg_attr(test, assert_instr(vpcmp))]
10371037
pub unsafe fn _mm512_cmpge_epu32_mask(a: __m512i, b: __m512i) -> __mmask16 {
1038-
_mm512_cmplt_epu32_mask(b, a)
1038+
simd_bitmask::<u32x16, _>(simd_ge(a.as_u32x16(), b.as_u32x16()))
10391039
}
10401040

10411041
/// Compare packed unsigned 32-bit integers in a and b for greater-than-or-equal, and store the results in a mask vector k
@@ -1046,7 +1046,7 @@ pub unsafe fn _mm512_cmpge_epu32_mask(a: __m512i, b: __m512i) -> __mmask16 {
10461046
#[target_feature(enable = "avx512f")]
10471047
#[cfg_attr(test, assert_instr(vpcmp))]
10481048
pub unsafe fn _mm512_mask_cmpge_epu32_mask(m: __mmask16, a: __m512i, b: __m512i) -> __mmask16 {
1049-
_mm512_cmplt_epu32_mask(b, a) & m
1049+
_mm512_cmpge_epu32_mask(a, b) & m
10501050
}
10511051

10521052
/// Compare packed unsigned 32-bit integers in a and b for equality, and store the results in a mask vector.
@@ -1181,7 +1181,7 @@ pub unsafe fn _mm512_mask_cmpgt_epi32_mask(m: __mmask16, a: __m512i, b: __m512i)
11811181
#[target_feature(enable = "avx512f")]
11821182
#[cfg_attr(test, assert_instr(vpcmp))]
11831183
pub unsafe fn _mm512_cmple_epi32_mask(a: __m512i, b: __m512i) -> __mmask16 {
1184-
_mm512_cmpgt_epi32_mask(b, a)
1184+
simd_bitmask::<i32x16, _>(simd_le(a.as_i32x16(), b.as_i32x16()))
11851185
}
11861186

11871187
/// Compare packed signed 32-bit integers in a and b for less-than-or-equal, and store the results in a mask vector k
@@ -1192,7 +1192,7 @@ pub unsafe fn _mm512_cmple_epi32_mask(a: __m512i, b: __m512i) -> __mmask16 {
11921192
#[target_feature(enable = "avx512f")]
11931193
#[cfg_attr(test, assert_instr(vpcmp))]
11941194
pub unsafe fn _mm512_mask_cmple_epi32_mask(m: __mmask16, a: __m512i, b: __m512i) -> __mmask16 {
1195-
_mm512_cmpgt_epi32_mask(b, a) & m
1195+
_mm512_cmple_epi32_mask(a, b) & m
11961196
}
11971197

11981198
/// Compare packed signed 32-bit integers in a and b for greater-than-or-equal, and store the results in a mask vector.
@@ -1202,7 +1202,7 @@ pub unsafe fn _mm512_mask_cmple_epi32_mask(m: __mmask16, a: __m512i, b: __m512i)
12021202
#[target_feature(enable = "avx512f")]
12031203
#[cfg_attr(test, assert_instr(vpcmp))]
12041204
pub unsafe fn _mm512_cmpge_epi32_mask(a: __m512i, b: __m512i) -> __mmask16 {
1205-
_mm512_cmplt_epi32_mask(b, a)
1205+
simd_bitmask::<i32x16, _>(simd_ge(a.as_i32x16(), b.as_i32x16()))
12061206
}
12071207

12081208
/// Compare packed signed 32-bit integers in a and b for greater-than-or-equal, and store the results in a mask vector k
@@ -1213,7 +1213,7 @@ pub unsafe fn _mm512_cmpge_epi32_mask(a: __m512i, b: __m512i) -> __mmask16 {
12131213
#[target_feature(enable = "avx512f")]
12141214
#[cfg_attr(test, assert_instr(vpcmp))]
12151215
pub unsafe fn _mm512_mask_cmpge_epi32_mask(m: __mmask16, a: __m512i, b: __m512i) -> __mmask16 {
1216-
_mm512_cmplt_epi32_mask(b, a) & m
1216+
_mm512_cmpge_epi32_mask(a, b) & m
12171217
}
12181218

12191219
/// Compare packed signed 32-bit integers in a and b for equality, and store the results in a mask vector.
@@ -1348,7 +1348,7 @@ pub unsafe fn _mm512_mask_cmpgt_epu64_mask(m: __mmask8, a: __m512i, b: __m512i)
13481348
#[target_feature(enable = "avx512f")]
13491349
#[cfg_attr(test, assert_instr(vpcmp))]
13501350
pub unsafe fn _mm512_cmple_epu64_mask(a: __m512i, b: __m512i) -> __mmask8 {
1351-
_mm512_cmpgt_epu64_mask(b, a)
1351+
simd_bitmask::<__m512i, _>(simd_le(a.as_u64x8(), b.as_u64x8()))
13521352
}
13531353

13541354
/// Compare packed unsigned 64-bit integers in a and b for less-than-or-equal, and store the results in a mask vector k
@@ -1359,7 +1359,7 @@ pub unsafe fn _mm512_cmple_epu64_mask(a: __m512i, b: __m512i) -> __mmask8 {
13591359
#[target_feature(enable = "avx512f")]
13601360
#[cfg_attr(test, assert_instr(vpcmp))]
13611361
pub unsafe fn _mm512_mask_cmple_epu64_mask(m: __mmask8, a: __m512i, b: __m512i) -> __mmask8 {
1362-
_mm512_cmpgt_epu64_mask(b, a) & m
1362+
_mm512_cmple_epu64_mask(a, b) & m
13631363
}
13641364

13651365
/// Compare packed unsigned 64-bit integers in a and b for greater-than-or-equal, and store the results in a mask vector.
@@ -1369,7 +1369,7 @@ pub unsafe fn _mm512_mask_cmple_epu64_mask(m: __mmask8, a: __m512i, b: __m512i)
13691369
#[target_feature(enable = "avx512f")]
13701370
#[cfg_attr(test, assert_instr(vpcmp))]
13711371
pub unsafe fn _mm512_cmpge_epu64_mask(a: __m512i, b: __m512i) -> __mmask8 {
1372-
_mm512_cmplt_epu64_mask(b, a)
1372+
simd_bitmask::<__m512i, _>(simd_ge(a.as_u64x8(), b.as_u64x8()))
13731373
}
13741374

13751375
/// Compare packed unsigned 64-bit integers in a and b for greater-than-or-equal, and store the results in a mask vector k
@@ -1380,7 +1380,7 @@ pub unsafe fn _mm512_cmpge_epu64_mask(a: __m512i, b: __m512i) -> __mmask8 {
13801380
#[target_feature(enable = "avx512f")]
13811381
#[cfg_attr(test, assert_instr(vpcmp))]
13821382
pub unsafe fn _mm512_mask_cmpge_epu64_mask(m: __mmask8, a: __m512i, b: __m512i) -> __mmask8 {
1383-
_mm512_cmplt_epu64_mask(b, a) & m
1383+
_mm512_cmpge_epu64_mask(b, a) & m
13841384
}
13851385

13861386
/// Compare packed unsigned 64-bit integers in a and b for equality, and store the results in a mask vector.
@@ -1515,7 +1515,7 @@ pub unsafe fn _mm512_mask_cmpgt_epi64_mask(m: __mmask8, a: __m512i, b: __m512i)
15151515
#[target_feature(enable = "avx512f")]
15161516
#[cfg_attr(test, assert_instr(vpcmp))]
15171517
pub unsafe fn _mm512_cmple_epi64_mask(a: __m512i, b: __m512i) -> __mmask8 {
1518-
_mm512_cmpgt_epi64_mask(b, a)
1518+
simd_bitmask::<__m512i, _>(simd_le(a.as_i64x8(), b.as_i64x8()))
15191519
}
15201520

15211521
/// Compare packed signed 64-bit integers in a and b for less-than-or-equal, and store the results in a mask vector k
@@ -1526,7 +1526,7 @@ pub unsafe fn _mm512_cmple_epi64_mask(a: __m512i, b: __m512i) -> __mmask8 {
15261526
#[target_feature(enable = "avx512f")]
15271527
#[cfg_attr(test, assert_instr(vpcmp))]
15281528
pub unsafe fn _mm512_mask_cmple_epi64_mask(m: __mmask8, a: __m512i, b: __m512i) -> __mmask8 {
1529-
_mm512_cmpgt_epi64_mask(b, a) & m
1529+
_mm512_cmple_epi64_mask(a, b) & m
15301530
}
15311531

15321532
/// Compare packed signed 64-bit integers in a and b for greater-than-or-equal, and store the results in a mask vector.
@@ -1536,7 +1536,7 @@ pub unsafe fn _mm512_mask_cmple_epi64_mask(m: __mmask8, a: __m512i, b: __m512i)
15361536
#[target_feature(enable = "avx512f")]
15371537
#[cfg_attr(test, assert_instr(vpcmp))]
15381538
pub unsafe fn _mm512_cmpge_epi64_mask(a: __m512i, b: __m512i) -> __mmask8 {
1539-
_mm512_cmplt_epi64_mask(b, a)
1539+
simd_bitmask::<__m512i, _>(simd_ge(a.as_i64x8(), b.as_i64x8()))
15401540
}
15411541

15421542
/// Compare packed signed 64-bit integers in a and b for greater-than-or-equal, and store the results in a mask vector k
@@ -1547,7 +1547,7 @@ pub unsafe fn _mm512_cmpge_epi64_mask(a: __m512i, b: __m512i) -> __mmask8 {
15471547
#[target_feature(enable = "avx512f")]
15481548
#[cfg_attr(test, assert_instr(vpcmp))]
15491549
pub unsafe fn _mm512_mask_cmpge_epi64_mask(m: __mmask8, a: __m512i, b: __m512i) -> __mmask8 {
1550-
_mm512_cmplt_epi64_mask(b, a) & m
1550+
_mm512_cmpge_epi64_mask(b, a) & m
15511551
}
15521552

15531553
/// Compare packed signed 64-bit integers in a and b for equality, and store the results in a mask vector.
@@ -1987,7 +1987,10 @@ mod tests {
19871987
let a = _mm512_set_epi32(0, 1, -1, u32::MAX as i32, i32::MAX, i32::MIN, 100, -100,
19881988
0, 1, -1, u32::MAX as i32, i32::MAX, i32::MIN, 100, -100);
19891989
let b = _mm512_set1_epi32(-1);
1990-
assert_eq!(_mm512_cmple_epu32_mask(a, b), _mm512_cmpgt_epu32_mask(b, a))
1990+
assert_eq!(
1991+
_mm512_cmple_epu32_mask(a, b),
1992+
!_mm512_cmpgt_epu32_mask(a, b)
1993+
)
19911994
}
19921995

19931996
#[simd_test(enable = "avx512f")]
@@ -1999,7 +2002,7 @@ mod tests {
19992002
let mask = 0b01111010_01111010;
20002003
assert_eq!(
20012004
_mm512_mask_cmple_epu32_mask(mask, a, b),
2002-
_mm512_mask_cmpgt_epu32_mask(mask, b, a)
2005+
0b01111010_01111010
20032006
);
20042007
}
20052008

@@ -2009,7 +2012,10 @@ mod tests {
20092012
let a = _mm512_set_epi32(0, 1, -1, u32::MAX as i32, i32::MAX, i32::MIN, 100, -100,
20102013
0, 1, -1, u32::MAX as i32, i32::MAX, i32::MIN, 100, -100);
20112014
let b = _mm512_set1_epi32(-1);
2012-
assert_eq!(_mm512_cmpge_epu32_mask(a, b), _mm512_cmplt_epu32_mask(b, a))
2015+
assert_eq!(
2016+
_mm512_cmpge_epu32_mask(a, b),
2017+
!_mm512_cmplt_epu32_mask(a, b)
2018+
)
20132019
}
20142020

20152021
#[simd_test(enable = "avx512f")]
@@ -2019,10 +2025,7 @@ mod tests {
20192025
0, 1, -1, u32::MAX as i32, i32::MAX, i32::MIN, 100, -100);
20202026
let b = _mm512_set1_epi32(-1);
20212027
let mask = 0b01111010_01111010;
2022-
assert_eq!(
2023-
_mm512_mask_cmpge_epu32_mask(mask, a, b),
2024-
_mm512_mask_cmplt_epu32_mask(mask, b, a)
2025-
);
2028+
assert_eq!(_mm512_mask_cmpge_epu32_mask(mask, a, b), 0b01100000_0110000);
20262029
}
20272030

20282031
#[simd_test(enable = "avx512f")]
@@ -2144,7 +2147,10 @@ mod tests {
21442147
let a = _mm512_set_epi32(0, 1, -1, u32::MAX as i32, i32::MAX, i32::MIN, 100, -100,
21452148
0, 1, -1, u32::MAX as i32, i32::MAX, i32::MIN, 100, -100);
21462149
let b = _mm512_set1_epi32(-1);
2147-
assert_eq!(_mm512_cmple_epi32_mask(a, b), _mm512_cmpgt_epi32_mask(b, a))
2150+
assert_eq!(
2151+
_mm512_cmple_epi32_mask(a, b),
2152+
!_mm512_cmpgt_epi32_mask(a, b)
2153+
)
21482154
}
21492155

21502156
#[simd_test(enable = "avx512f")]
@@ -2154,10 +2160,7 @@ mod tests {
21542160
0, 1, -1, u32::MAX as i32, i32::MAX, i32::MIN, 100, -100);
21552161
let b = _mm512_set1_epi32(-1);
21562162
let mask = 0b01111010_01111010;
2157-
assert_eq!(
2158-
_mm512_mask_cmple_epi32_mask(mask, a, b),
2159-
_mm512_mask_cmpgt_epi32_mask(mask, b, a)
2160-
);
2163+
assert_eq!(_mm512_mask_cmple_epi32_mask(mask, a, b), 0b01100000_0110000);
21612164
}
21622165

21632166
#[simd_test(enable = "avx512f")]
@@ -2166,7 +2169,10 @@ mod tests {
21662169
let a = _mm512_set_epi32(0, 1, -1, u32::MAX as i32, i32::MAX, i32::MIN, 100, -100,
21672170
0, 1, -1, u32::MAX as i32, i32::MAX, i32::MIN, 100, -100);
21682171
let b = _mm512_set1_epi32(-1);
2169-
assert_eq!(_mm512_cmpge_epi32_mask(a, b), _mm512_cmplt_epi32_mask(b, a))
2172+
assert_eq!(
2173+
_mm512_cmpge_epi32_mask(a, b),
2174+
!_mm512_cmplt_epi32_mask(a, b)
2175+
)
21702176
}
21712177

21722178
#[simd_test(enable = "avx512f")]
@@ -2178,7 +2184,7 @@ mod tests {
21782184
let mask = 0b01111010_01111010;
21792185
assert_eq!(
21802186
_mm512_mask_cmpge_epi32_mask(mask, a, b),
2181-
_mm512_mask_cmplt_epi32_mask(mask, b, a)
2187+
0b01111010_01111010
21822188
);
21832189
}
21842190

crates/core_arch/src/x86_64/avx512f.rs

+20-20
Original file line numberDiff line numberDiff line change
@@ -141,36 +141,36 @@ mod tests {
141141
unsafe fn test_mm512_cmple_epu64_mask() {
142142
let a = _mm512_set_epi64(0, 1, -1, u64::MAX as i64, i64::MAX, i64::MIN, 100, -100);
143143
let b = _mm512_set1_epi64(-1);
144-
assert_eq!(_mm512_cmple_epu64_mask(a, b), _mm512_cmpgt_epu64_mask(b, a))
144+
assert_eq!(
145+
_mm512_cmple_epu64_mask(a, b),
146+
!_mm512_cmpgt_epu64_mask(a, b)
147+
)
145148
}
146149

147150
#[simd_test(enable = "avx512f")]
148151
unsafe fn test_mm512_mask_cmple_epu64_mask() {
149152
let a = _mm512_set_epi64(0, 1, -1, u64::MAX as i64, i64::MAX, i64::MIN, 100, -100);
150153
let b = _mm512_set1_epi64(-1);
151154
let mask = 0b01111010;
152-
assert_eq!(
153-
_mm512_mask_cmple_epu64_mask(mask, a, b),
154-
_mm512_mask_cmpgt_epu64_mask(mask, b, a)
155-
);
155+
assert_eq!(_mm512_mask_cmple_epu64_mask(mask, a, b), 0b01111010);
156156
}
157157

158158
#[simd_test(enable = "avx512f")]
159159
unsafe fn test_mm512_cmpge_epu64_mask() {
160160
let a = _mm512_set_epi64(0, 1, -1, u64::MAX as i64, i64::MAX, i64::MIN, 100, -100);
161161
let b = _mm512_set1_epi64(-1);
162-
assert_eq!(_mm512_cmpge_epu64_mask(a, b), _mm512_cmplt_epu64_mask(b, a))
162+
assert_eq!(
163+
_mm512_cmpge_epu64_mask(a, b),
164+
!_mm512_cmplt_epu64_mask(a, b)
165+
);
163166
}
164167

165168
#[simd_test(enable = "avx512f")]
166169
unsafe fn test_mm512_mask_cmpge_epu64_mask() {
167170
let a = _mm512_set_epi64(0, 1, -1, u64::MAX as i64, i64::MAX, i64::MIN, 100, -100);
168171
let b = _mm512_set1_epi64(-1);
169172
let mask = 0b01111010;
170-
assert_eq!(
171-
_mm512_mask_cmpge_epu64_mask(mask, a, b),
172-
_mm512_mask_cmplt_epu64_mask(mask, b, a)
173-
);
173+
assert_eq!(_mm512_mask_cmpge_epu64_mask(mask, a, b), 0b01111010);
174174
}
175175

176176
#[simd_test(enable = "avx512f")]
@@ -262,36 +262,36 @@ mod tests {
262262
unsafe fn test_mm512_cmple_epi64_mask() {
263263
let a = _mm512_set_epi64(0, 1, -1, u64::MAX as i64, i64::MAX, i64::MIN, 100, -100);
264264
let b = _mm512_set1_epi64(-1);
265-
assert_eq!(_mm512_cmple_epi64_mask(a, b), _mm512_cmpgt_epi64_mask(b, a))
265+
assert_eq!(
266+
_mm512_cmple_epi64_mask(a, b),
267+
!_mm512_cmpgt_epi64_mask(a, b)
268+
)
266269
}
267270

268271
#[simd_test(enable = "avx512f")]
269272
unsafe fn test_mm512_mask_cmple_epi64_mask() {
270273
let a = _mm512_set_epi64(0, 1, -1, u64::MAX as i64, i64::MAX, i64::MIN, 100, -100);
271274
let b = _mm512_set1_epi64(-1);
272275
let mask = 0b01111010;
273-
assert_eq!(
274-
_mm512_mask_cmple_epi64_mask(mask, a, b),
275-
_mm512_mask_cmpgt_epi64_mask(mask, b, a)
276-
);
276+
assert_eq!(_mm512_mask_cmple_epi64_mask(mask, a, b), 0b00110000);
277277
}
278278

279279
#[simd_test(enable = "avx512f")]
280280
unsafe fn test_mm512_cmpge_epi64_mask() {
281281
let a = _mm512_set_epi64(0, 1, -1, u64::MAX as i64, i64::MAX, i64::MIN, 100, -100);
282282
let b = _mm512_set1_epi64(-1);
283-
assert_eq!(_mm512_cmpge_epi64_mask(a, b), _mm512_cmplt_epi64_mask(b, a))
283+
assert_eq!(
284+
_mm512_cmpge_epi64_mask(a, b),
285+
!_mm512_cmplt_epi64_mask(a, b)
286+
)
284287
}
285288

286289
#[simd_test(enable = "avx512f")]
287290
unsafe fn test_mm512_mask_cmpge_epi64_mask() {
288291
let a = _mm512_set_epi64(0, 1, -1, u64::MAX as i64, i64::MAX, i64::MIN, 100, -100);
289292
let b = _mm512_set1_epi64(-1);
290293
let mask = 0b01111010;
291-
assert_eq!(
292-
_mm512_mask_cmpge_epi64_mask(mask, a, b),
293-
_mm512_mask_cmplt_epi64_mask(mask, b, a)
294-
);
294+
assert_eq!(_mm512_mask_cmpge_epi64_mask(mask, a, b), 0b0110000);
295295
}
296296

297297
#[simd_test(enable = "avx512f")]

0 commit comments

Comments
 (0)