|
2 | 2 | ; RUN: llc < %s -mtriple=i686-unknown-unknown -mattr=+mmx | FileCheck %s --check-prefixes=X86,X86-MMX
|
3 | 3 | ; RUN: llc < %s -mtriple=i686-unknown-unknown -mattr=+mmx,+sse2 | FileCheck %s --check-prefixes=X86,X86-SSE
|
4 | 4 | ; RUN: llc < %s -mtriple=i686-unknown-unknown -mattr=+mmx,+ssse3 | FileCheck %s --check-prefixes=X86,X86-SSE
|
5 |
| -; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+mmx,+sse2 | FileCheck %s --check-prefix=X64 |
6 |
| -; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+mmx,+ssse3 | FileCheck %s --check-prefix=X64 |
7 |
| -; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+mmx,+avx | FileCheck %s --check-prefix=X64 |
8 |
| -; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+mmx,+avx2 | FileCheck %s --check-prefix=X64 |
9 |
| -; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+mmx,+avx512f | FileCheck %s --check-prefix=X64 |
| 5 | +; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+mmx,+sse2 | FileCheck %s --check-prefixes=X64,X64-SSE2 |
| 6 | +; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+mmx,+ssse3 | FileCheck %s --check-prefixes=X64,X64-SSSE3 |
| 7 | +; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+mmx,+avx | FileCheck %s --check-prefixes=X64,X64-AVX |
| 8 | +; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+mmx,+avx2 | FileCheck %s --check-prefixes=X64,X64-AVX |
| 9 | +; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+mmx,+avx512f | FileCheck %s --check-prefixes=X64,X64-AVX |
10 | 10 |
|
11 | 11 | declare <1 x i64> @llvm.x86.mmx.padd.d(<1 x i64>, <1 x i64>)
|
12 | 12 |
|
@@ -536,6 +536,71 @@ define void @build_v8i8_0123zzzu(ptr%p0, i8 %a0, i8 %a1, i8 %a2, i8 %a3, i8 %a4,
|
536 | 536 | ; X86-SSE-NEXT: paddd %mm0, %mm0
|
537 | 537 | ; X86-SSE-NEXT: movq %mm0, (%eax)
|
538 | 538 | ; X86-SSE-NEXT: retl
|
| 539 | +; |
| 540 | +; X64-SSE2-LABEL: build_v8i8_0123zzzu: |
| 541 | +; X64-SSE2: # %bb.0: |
| 542 | +; X64-SSE2-NEXT: movd %esi, %xmm0 |
| 543 | +; X64-SSE2-NEXT: movdqa {{.*#+}} xmm1 = [0,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255] |
| 544 | +; X64-SSE2-NEXT: pand %xmm1, %xmm2 |
| 545 | +; X64-SSE2-NEXT: pandn %xmm0, %xmm1 |
| 546 | +; X64-SSE2-NEXT: por %xmm2, %xmm1 |
| 547 | +; X64-SSE2-NEXT: movdqa {{.*#+}} xmm0 = [255,0,255,255,255,255,255,255,255,255,255,255,255,255,255,255] |
| 548 | +; X64-SSE2-NEXT: pand %xmm0, %xmm1 |
| 549 | +; X64-SSE2-NEXT: movd %edx, %xmm2 |
| 550 | +; X64-SSE2-NEXT: psllw $8, %xmm2 |
| 551 | +; X64-SSE2-NEXT: pandn %xmm2, %xmm0 |
| 552 | +; X64-SSE2-NEXT: por %xmm1, %xmm0 |
| 553 | +; X64-SSE2-NEXT: movdqa {{.*#+}} xmm1 = [255,255,0,255,255,255,255,255,255,255,255,255,255,255,255,255] |
| 554 | +; X64-SSE2-NEXT: pand %xmm1, %xmm0 |
| 555 | +; X64-SSE2-NEXT: movd %ecx, %xmm2 |
| 556 | +; X64-SSE2-NEXT: pslld $16, %xmm2 |
| 557 | +; X64-SSE2-NEXT: pandn %xmm2, %xmm1 |
| 558 | +; X64-SSE2-NEXT: por %xmm0, %xmm1 |
| 559 | +; X64-SSE2-NEXT: movdqa {{.*#+}} xmm0 = [255,255,255,0,255,255,255,255,255,255,255,255,255,255,255,255] |
| 560 | +; X64-SSE2-NEXT: pand %xmm0, %xmm1 |
| 561 | +; X64-SSE2-NEXT: movd %r8d, %xmm2 |
| 562 | +; X64-SSE2-NEXT: pslld $24, %xmm2 |
| 563 | +; X64-SSE2-NEXT: pandn %xmm2, %xmm0 |
| 564 | +; X64-SSE2-NEXT: por %xmm1, %xmm0 |
| 565 | +; X64-SSE2-NEXT: pand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0 |
| 566 | +; X64-SSE2-NEXT: movdq2q %xmm0, %mm0 |
| 567 | +; X64-SSE2-NEXT: paddd %mm0, %mm0 |
| 568 | +; X64-SSE2-NEXT: movq %mm0, (%rdi) |
| 569 | +; X64-SSE2-NEXT: retq |
| 570 | +; |
| 571 | +; X64-SSSE3-LABEL: build_v8i8_0123zzzu: |
| 572 | +; X64-SSSE3: # %bb.0: |
| 573 | +; X64-SSSE3-NEXT: movd %esi, %xmm0 |
| 574 | +; X64-SSSE3-NEXT: movss {{.*#+}} xmm0 = xmm0[0,1,2,3] |
| 575 | +; X64-SSSE3-NEXT: pshufb {{.*#+}} xmm0 = xmm0[0],zero,xmm0[u,u,u,u,u,7,u,u,u,u,u,u,u,u] |
| 576 | +; X64-SSSE3-NEXT: movd %edx, %xmm1 |
| 577 | +; X64-SSSE3-NEXT: pshufb {{.*#+}} xmm1 = zero,xmm1[0,u,u,u,u,u],zero,xmm1[u,u,u,u,u,u,u,u] |
| 578 | +; X64-SSSE3-NEXT: por %xmm0, %xmm1 |
| 579 | +; X64-SSSE3-NEXT: pshufb {{.*#+}} xmm1 = xmm1[0,1],zero,xmm1[u,u,u,u,7,u,u,u,u,u,u,u,u] |
| 580 | +; X64-SSSE3-NEXT: movd %ecx, %xmm0 |
| 581 | +; X64-SSSE3-NEXT: pshufb {{.*#+}} xmm0 = zero,zero,xmm0[0,u,u,u,u],zero,xmm0[u,u,u,u,u,u,u,u] |
| 582 | +; X64-SSSE3-NEXT: por %xmm1, %xmm0 |
| 583 | +; X64-SSSE3-NEXT: pshufb {{.*#+}} xmm0 = xmm0[0,1,2],zero,xmm0[u,u,u,7,u,u,u,u,u,u,u,u] |
| 584 | +; X64-SSSE3-NEXT: movd %r8d, %xmm1 |
| 585 | +; X64-SSSE3-NEXT: pshufb {{.*#+}} xmm1 = zero,zero,zero,xmm1[0,u,u,u],zero,xmm1[u,u,u,u,u,u,u,u] |
| 586 | +; X64-SSSE3-NEXT: por %xmm0, %xmm1 |
| 587 | +; X64-SSSE3-NEXT: pand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1 |
| 588 | +; X64-SSSE3-NEXT: movdq2q %xmm1, %mm0 |
| 589 | +; X64-SSSE3-NEXT: paddd %mm0, %mm0 |
| 590 | +; X64-SSSE3-NEXT: movq %mm0, (%rdi) |
| 591 | +; X64-SSSE3-NEXT: retq |
| 592 | +; |
| 593 | +; X64-AVX-LABEL: build_v8i8_0123zzzu: |
| 594 | +; X64-AVX: # %bb.0: |
| 595 | +; X64-AVX-NEXT: vpinsrb $0, %esi, %xmm0, %xmm0 |
| 596 | +; X64-AVX-NEXT: vpinsrb $1, %edx, %xmm0, %xmm0 |
| 597 | +; X64-AVX-NEXT: vpinsrb $2, %ecx, %xmm0, %xmm0 |
| 598 | +; X64-AVX-NEXT: vpinsrb $3, %r8d, %xmm0, %xmm0 |
| 599 | +; X64-AVX-NEXT: vpand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0 |
| 600 | +; X64-AVX-NEXT: movdq2q %xmm0, %mm0 |
| 601 | +; X64-AVX-NEXT: paddd %mm0, %mm0 |
| 602 | +; X64-AVX-NEXT: movq %mm0, (%rdi) |
| 603 | +; X64-AVX-NEXT: retq |
539 | 604 | %1 = insertelement <8 x i8> undef, i8 %a0, i32 0
|
540 | 605 | %2 = insertelement <8 x i8> %1, i8 %a1, i32 1
|
541 | 606 | %3 = insertelement <8 x i8> %2, i8 %a2, i32 2
|
@@ -608,6 +673,36 @@ define void @build_v8i8_0zzzzzzu(ptr%p0, i8 %a0, i8 %a1, i8 %a2, i8 %a3, i8 %a4,
|
608 | 673 | ; X86-SSE-NEXT: paddd %mm0, %mm0
|
609 | 674 | ; X86-SSE-NEXT: movq %mm0, (%eax)
|
610 | 675 | ; X86-SSE-NEXT: retl
|
| 676 | +; |
| 677 | +; X64-SSE2-LABEL: build_v8i8_0zzzzzzu: |
| 678 | +; X64-SSE2: # %bb.0: |
| 679 | +; X64-SSE2-NEXT: movd %esi, %xmm0 |
| 680 | +; X64-SSE2-NEXT: pand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1 |
| 681 | +; X64-SSE2-NEXT: por %xmm0, %xmm1 |
| 682 | +; X64-SSE2-NEXT: pand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1 |
| 683 | +; X64-SSE2-NEXT: movdq2q %xmm1, %mm0 |
| 684 | +; X64-SSE2-NEXT: paddd %mm0, %mm0 |
| 685 | +; X64-SSE2-NEXT: movq %mm0, (%rdi) |
| 686 | +; X64-SSE2-NEXT: retq |
| 687 | +; |
| 688 | +; X64-SSSE3-LABEL: build_v8i8_0zzzzzzu: |
| 689 | +; X64-SSSE3: # %bb.0: |
| 690 | +; X64-SSSE3-NEXT: movd %esi, %xmm0 |
| 691 | +; X64-SSSE3-NEXT: palignr {{.*#+}} xmm0 = xmm0[1,2,3,4,5,6,7,8,9,10,11,12,13,14,15,0] |
| 692 | +; X64-SSSE3-NEXT: pshufb {{.*#+}} xmm0 = xmm0[15],zero,zero,zero,zero,zero,zero,xmm0[6],zero,zero,zero,zero,zero,zero,zero,zero |
| 693 | +; X64-SSSE3-NEXT: movdq2q %xmm0, %mm0 |
| 694 | +; X64-SSSE3-NEXT: paddd %mm0, %mm0 |
| 695 | +; X64-SSSE3-NEXT: movq %mm0, (%rdi) |
| 696 | +; X64-SSSE3-NEXT: retq |
| 697 | +; |
| 698 | +; X64-AVX-LABEL: build_v8i8_0zzzzzzu: |
| 699 | +; X64-AVX: # %bb.0: |
| 700 | +; X64-AVX-NEXT: vpinsrb $0, %esi, %xmm0, %xmm0 |
| 701 | +; X64-AVX-NEXT: vpand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0 |
| 702 | +; X64-AVX-NEXT: movdq2q %xmm0, %mm0 |
| 703 | +; X64-AVX-NEXT: paddd %mm0, %mm0 |
| 704 | +; X64-AVX-NEXT: movq %mm0, (%rdi) |
| 705 | +; X64-AVX-NEXT: retq |
611 | 706 | %1 = insertelement <8 x i8> undef, i8 %a0, i32 0
|
612 | 707 | %2 = insertelement <8 x i8> %1, i8 0, i32 1
|
613 | 708 | %3 = insertelement <8 x i8> %2, i8 0, i32 2
|
|
0 commit comments