Skip to content

Commit 225768d

Browse files
committed
[X86] combineConcatVectorOps - add tests showing v4i64 shift-by-32 with unnecessary concatenation
On AVX1-only targets, we concat SHL/SRL AVX1 v4i64 by 32-bits as a shuffle. But this is only worth while if the shift source value is free to concatenate.
1 parent 3ff69c8 commit 225768d

File tree

2 files changed

+134
-0
lines changed

2 files changed

+134
-0
lines changed

llvm/test/CodeGen/X86/vector-shift-lshr-256.ll

Lines changed: 67 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1971,6 +1971,73 @@ define <4 x i64> @shift32_v4i64(<4 x i64> %a) nounwind {
19711971
ret <4 x i64> %shift
19721972
}
19731973

1974+
define <4 x i64> @shift32_v4i64_concat(<2 x i64> %lo, <2 x i64> %hi) nounwind {
1975+
; AVX1-LABEL: shift32_v4i64_concat:
1976+
; AVX1: # %bb.0:
1977+
; AVX1-NEXT: # kill: def $xmm0 killed $xmm0 def $ymm0
1978+
; AVX1-NEXT: vinsertf128 $1, %xmm1, %ymm0, %ymm0
1979+
; AVX1-NEXT: vxorps %xmm1, %xmm1, %xmm1
1980+
; AVX1-NEXT: vshufps {{.*#+}} ymm0 = ymm0[1,3],ymm1[1,3],ymm0[5,7],ymm1[5,7]
1981+
; AVX1-NEXT: vshufps {{.*#+}} ymm0 = ymm0[0,2,1,3,4,6,5,7]
1982+
; AVX1-NEXT: retq
1983+
;
1984+
; AVX2-LABEL: shift32_v4i64_concat:
1985+
; AVX2: # %bb.0:
1986+
; AVX2-NEXT: # kill: def $xmm0 killed $xmm0 def $ymm0
1987+
; AVX2-NEXT: vinserti128 $1, %xmm1, %ymm0, %ymm0
1988+
; AVX2-NEXT: vpsrlq $32, %ymm0, %ymm0
1989+
; AVX2-NEXT: retq
1990+
;
1991+
; XOPAVX1-LABEL: shift32_v4i64_concat:
1992+
; XOPAVX1: # %bb.0:
1993+
; XOPAVX1-NEXT: # kill: def $xmm0 killed $xmm0 def $ymm0
1994+
; XOPAVX1-NEXT: vinsertf128 $1, %xmm1, %ymm0, %ymm0
1995+
; XOPAVX1-NEXT: vxorps %xmm1, %xmm1, %xmm1
1996+
; XOPAVX1-NEXT: vshufps {{.*#+}} ymm0 = ymm0[1,3],ymm1[1,3],ymm0[5,7],ymm1[5,7]
1997+
; XOPAVX1-NEXT: vshufps {{.*#+}} ymm0 = ymm0[0,2,1,3,4,6,5,7]
1998+
; XOPAVX1-NEXT: retq
1999+
;
2000+
; XOPAVX2-LABEL: shift32_v4i64_concat:
2001+
; XOPAVX2: # %bb.0:
2002+
; XOPAVX2-NEXT: # kill: def $xmm0 killed $xmm0 def $ymm0
2003+
; XOPAVX2-NEXT: vinserti128 $1, %xmm1, %ymm0, %ymm0
2004+
; XOPAVX2-NEXT: vpsrlq $32, %ymm0, %ymm0
2005+
; XOPAVX2-NEXT: retq
2006+
;
2007+
; AVX512-LABEL: shift32_v4i64_concat:
2008+
; AVX512: # %bb.0:
2009+
; AVX512-NEXT: # kill: def $xmm0 killed $xmm0 def $ymm0
2010+
; AVX512-NEXT: vinserti128 $1, %xmm1, %ymm0, %ymm0
2011+
; AVX512-NEXT: vpsrlq $32, %ymm0, %ymm0
2012+
; AVX512-NEXT: retq
2013+
;
2014+
; AVX512VL-LABEL: shift32_v4i64_concat:
2015+
; AVX512VL: # %bb.0:
2016+
; AVX512VL-NEXT: # kill: def $xmm0 killed $xmm0 def $ymm0
2017+
; AVX512VL-NEXT: vinserti128 $1, %xmm1, %ymm0, %ymm0
2018+
; AVX512VL-NEXT: vpsrlq $32, %ymm0, %ymm0
2019+
; AVX512VL-NEXT: retq
2020+
;
2021+
; X86-AVX1-LABEL: shift32_v4i64_concat:
2022+
; X86-AVX1: # %bb.0:
2023+
; X86-AVX1-NEXT: # kill: def $xmm0 killed $xmm0 def $ymm0
2024+
; X86-AVX1-NEXT: vinsertf128 $1, %xmm1, %ymm0, %ymm0
2025+
; X86-AVX1-NEXT: vxorps %xmm1, %xmm1, %xmm1
2026+
; X86-AVX1-NEXT: vshufps {{.*#+}} ymm0 = ymm0[1,3],ymm1[1,3],ymm0[5,7],ymm1[5,7]
2027+
; X86-AVX1-NEXT: vshufps {{.*#+}} ymm0 = ymm0[0,2,1,3,4,6,5,7]
2028+
; X86-AVX1-NEXT: retl
2029+
;
2030+
; X86-AVX2-LABEL: shift32_v4i64_concat:
2031+
; X86-AVX2: # %bb.0:
2032+
; X86-AVX2-NEXT: # kill: def $xmm0 killed $xmm0 def $ymm0
2033+
; X86-AVX2-NEXT: vinserti128 $1, %xmm1, %ymm0, %ymm0
2034+
; X86-AVX2-NEXT: vpsrlq $32, %ymm0, %ymm0
2035+
; X86-AVX2-NEXT: retl
2036+
%a = shufflevector <2 x i64> %lo, <2 x i64> %hi, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
2037+
%shift = lshr <4 x i64> %a, <i64 32, i64 32, i64 32, i64 32>
2038+
ret <4 x i64> %shift
2039+
}
2040+
19742041
define <4 x i32> @sh_trunc_sh_vec(<4 x i64> %x) {
19752042
; AVX1-LABEL: sh_trunc_sh_vec:
19762043
; AVX1: # %bb.0:

llvm/test/CodeGen/X86/vector-shift-shl-256.ll

Lines changed: 67 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1823,3 +1823,70 @@ define <4 x i64> @shift32_v4i64(<4 x i64> %a) nounwind {
18231823
%shift = shl <4 x i64> %a, <i64 32, i64 32, i64 32, i64 32>
18241824
ret <4 x i64> %shift
18251825
}
1826+
1827+
define <4 x i64> @shift32_v4i64_concat(<2 x i64> %lo, <2 x i64> %hi) nounwind {
1828+
; AVX1-LABEL: shift32_v4i64_concat:
1829+
; AVX1: # %bb.0:
1830+
; AVX1-NEXT: # kill: def $xmm0 killed $xmm0 def $ymm0
1831+
; AVX1-NEXT: vinsertf128 $1, %xmm1, %ymm0, %ymm0
1832+
; AVX1-NEXT: vxorps %xmm1, %xmm1, %xmm1
1833+
; AVX1-NEXT: vshufps {{.*#+}} ymm0 = ymm1[0,2],ymm0[0,2],ymm1[4,6],ymm0[4,6]
1834+
; AVX1-NEXT: vshufps {{.*#+}} ymm0 = ymm0[0,2,1,3,4,6,5,7]
1835+
; AVX1-NEXT: retq
1836+
;
1837+
; AVX2-LABEL: shift32_v4i64_concat:
1838+
; AVX2: # %bb.0:
1839+
; AVX2-NEXT: # kill: def $xmm0 killed $xmm0 def $ymm0
1840+
; AVX2-NEXT: vinserti128 $1, %xmm1, %ymm0, %ymm0
1841+
; AVX2-NEXT: vpsllq $32, %ymm0, %ymm0
1842+
; AVX2-NEXT: retq
1843+
;
1844+
; XOPAVX1-LABEL: shift32_v4i64_concat:
1845+
; XOPAVX1: # %bb.0:
1846+
; XOPAVX1-NEXT: # kill: def $xmm0 killed $xmm0 def $ymm0
1847+
; XOPAVX1-NEXT: vinsertf128 $1, %xmm1, %ymm0, %ymm0
1848+
; XOPAVX1-NEXT: vxorps %xmm1, %xmm1, %xmm1
1849+
; XOPAVX1-NEXT: vshufps {{.*#+}} ymm0 = ymm1[0,2],ymm0[0,2],ymm1[4,6],ymm0[4,6]
1850+
; XOPAVX1-NEXT: vshufps {{.*#+}} ymm0 = ymm0[0,2,1,3,4,6,5,7]
1851+
; XOPAVX1-NEXT: retq
1852+
;
1853+
; XOPAVX2-LABEL: shift32_v4i64_concat:
1854+
; XOPAVX2: # %bb.0:
1855+
; XOPAVX2-NEXT: # kill: def $xmm0 killed $xmm0 def $ymm0
1856+
; XOPAVX2-NEXT: vinserti128 $1, %xmm1, %ymm0, %ymm0
1857+
; XOPAVX2-NEXT: vpsllq $32, %ymm0, %ymm0
1858+
; XOPAVX2-NEXT: retq
1859+
;
1860+
; AVX512-LABEL: shift32_v4i64_concat:
1861+
; AVX512: # %bb.0:
1862+
; AVX512-NEXT: # kill: def $xmm0 killed $xmm0 def $ymm0
1863+
; AVX512-NEXT: vinserti128 $1, %xmm1, %ymm0, %ymm0
1864+
; AVX512-NEXT: vpsllq $32, %ymm0, %ymm0
1865+
; AVX512-NEXT: retq
1866+
;
1867+
; AVX512VL-LABEL: shift32_v4i64_concat:
1868+
; AVX512VL: # %bb.0:
1869+
; AVX512VL-NEXT: # kill: def $xmm0 killed $xmm0 def $ymm0
1870+
; AVX512VL-NEXT: vinserti128 $1, %xmm1, %ymm0, %ymm0
1871+
; AVX512VL-NEXT: vpsllq $32, %ymm0, %ymm0
1872+
; AVX512VL-NEXT: retq
1873+
;
1874+
; X86-AVX1-LABEL: shift32_v4i64_concat:
1875+
; X86-AVX1: # %bb.0:
1876+
; X86-AVX1-NEXT: # kill: def $xmm0 killed $xmm0 def $ymm0
1877+
; X86-AVX1-NEXT: vinsertf128 $1, %xmm1, %ymm0, %ymm0
1878+
; X86-AVX1-NEXT: vxorps %xmm1, %xmm1, %xmm1
1879+
; X86-AVX1-NEXT: vshufps {{.*#+}} ymm0 = ymm1[0,2],ymm0[0,2],ymm1[4,6],ymm0[4,6]
1880+
; X86-AVX1-NEXT: vshufps {{.*#+}} ymm0 = ymm0[0,2,1,3,4,6,5,7]
1881+
; X86-AVX1-NEXT: retl
1882+
;
1883+
; X86-AVX2-LABEL: shift32_v4i64_concat:
1884+
; X86-AVX2: # %bb.0:
1885+
; X86-AVX2-NEXT: # kill: def $xmm0 killed $xmm0 def $ymm0
1886+
; X86-AVX2-NEXT: vinserti128 $1, %xmm1, %ymm0, %ymm0
1887+
; X86-AVX2-NEXT: vpsllq $32, %ymm0, %ymm0
1888+
; X86-AVX2-NEXT: retl
1889+
%a = shufflevector <2 x i64> %lo, <2 x i64> %hi, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
1890+
%shift = shl <4 x i64> %a, <i64 32, i64 32, i64 32, i64 32>
1891+
ret <4 x i64> %shift
1892+
}

0 commit comments

Comments
 (0)