Skip to content

Commit d1b25a0

Browse files
committed
[CIR][CIRGen][Builtin][X86] Lower avx512 scatter intrinsics
1 parent 3edeb5d commit d1b25a0

File tree

3 files changed

+506
-2
lines changed

3 files changed

+506
-2
lines changed

clang/lib/CIR/CodeGen/CIRGenBuiltinX86.cpp

Lines changed: 91 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -837,8 +837,97 @@ mlir::Value CIRGenFunction::emitX86BuiltinExpr(unsigned BuiltinID,
837837
case X86::BI__builtin_ia32_scattersiv4sf:
838838
case X86::BI__builtin_ia32_scattersiv4si:
839839
case X86::BI__builtin_ia32_scattersiv8sf:
840-
case X86::BI__builtin_ia32_scattersiv8si:
841-
llvm_unreachable("scattersiv8df NYI");
840+
case X86::BI__builtin_ia32_scattersiv8si: {
841+
llvm::StringRef intrinsicName;
842+
switch (BuiltinID) {
843+
default:
844+
llvm_unreachable("Unexpected builtin");
845+
case X86::BI__builtin_ia32_scattersiv8df:
846+
intrinsicName = "x86.avx512.mask.scatter.dpd.512";
847+
break;
848+
case X86::BI__builtin_ia32_scattersiv16sf:
849+
intrinsicName = "x86.avx512.mask.scatter.dps.512";
850+
break;
851+
case X86::BI__builtin_ia32_scatterdiv8df:
852+
intrinsicName = "x86.avx512.mask.scatter.qpd.512";
853+
break;
854+
case X86::BI__builtin_ia32_scatterdiv16sf:
855+
intrinsicName = "x86.avx512.mask.scatter.qps.512";
856+
break;
857+
case X86::BI__builtin_ia32_scattersiv8di:
858+
intrinsicName = "x86.avx512.mask.scatter.dpq.512";
859+
break;
860+
case X86::BI__builtin_ia32_scattersiv16si:
861+
intrinsicName = "x86.avx512.mask.scatter.dpi.512";
862+
break;
863+
case X86::BI__builtin_ia32_scatterdiv8di:
864+
intrinsicName = "x86.avx512.mask.scatter.qpq.512";
865+
break;
866+
case X86::BI__builtin_ia32_scatterdiv16si:
867+
intrinsicName = "x86.avx512.mask.scatter.qpi.512";
868+
break;
869+
case X86::BI__builtin_ia32_scatterdiv2df:
870+
intrinsicName = "x86.avx512.mask.scatterdiv2.df";
871+
break;
872+
case X86::BI__builtin_ia32_scatterdiv2di:
873+
intrinsicName = "x86.avx512.mask.scatterdiv2.di";
874+
break;
875+
case X86::BI__builtin_ia32_scatterdiv4df:
876+
intrinsicName = "x86.avx512.mask.scatterdiv4.df";
877+
break;
878+
case X86::BI__builtin_ia32_scatterdiv4di:
879+
intrinsicName = "x86.avx512.mask.scatterdiv4.di";
880+
break;
881+
case X86::BI__builtin_ia32_scatterdiv4sf:
882+
intrinsicName = "x86.avx512.mask.scatterdiv4.sf";
883+
break;
884+
case X86::BI__builtin_ia32_scatterdiv4si:
885+
intrinsicName = "x86.avx512.mask.scatterdiv4.si";
886+
break;
887+
case X86::BI__builtin_ia32_scatterdiv8sf:
888+
intrinsicName = "x86.avx512.mask.scatterdiv8.sf";
889+
break;
890+
case X86::BI__builtin_ia32_scatterdiv8si:
891+
intrinsicName = "x86.avx512.mask.scatterdiv8.si";
892+
break;
893+
case X86::BI__builtin_ia32_scattersiv2df:
894+
intrinsicName = "x86.avx512.mask.scattersiv2.df";
895+
break;
896+
case X86::BI__builtin_ia32_scattersiv2di:
897+
intrinsicName = "x86.avx512.mask.scattersiv2.di";
898+
break;
899+
case X86::BI__builtin_ia32_scattersiv4df:
900+
intrinsicName = "x86.avx512.mask.scattersiv4.df";
901+
break;
902+
case X86::BI__builtin_ia32_scattersiv4di:
903+
intrinsicName = "x86.avx512.mask.scattersiv4.di";
904+
break;
905+
case X86::BI__builtin_ia32_scattersiv4sf:
906+
intrinsicName = "x86.avx512.mask.scattersiv4.sf";
907+
break;
908+
case X86::BI__builtin_ia32_scattersiv4si:
909+
intrinsicName = "x86.avx512.mask.scattersiv4.si";
910+
break;
911+
case X86::BI__builtin_ia32_scattersiv8sf:
912+
intrinsicName = "x86.avx512.mask.scattersiv8.sf";
913+
break;
914+
case X86::BI__builtin_ia32_scattersiv8si:
915+
intrinsicName = "x86.avx512.mask.scattersiv8.si";
916+
break;
917+
}
918+
919+
unsigned minElts =
920+
std::min(cast<cir::VectorType>(Ops[2].getType()).getSize(),
921+
cast<cir::VectorType>(Ops[3].getType()).getSize());
922+
Ops[1] = getMaskVecValue(*this, Ops[1], minElts, getLoc(E->getExprLoc()));
923+
924+
return builder
925+
.create<cir::LLVMIntrinsicCallOp>(
926+
getLoc(E->getExprLoc()), builder.getStringAttr(intrinsicName.str()),
927+
builder.getVoidTy(), Ops)
928+
.getResult();
929+
}
930+
842931
case X86::BI__builtin_ia32_vextractf128_pd256:
843932
case X86::BI__builtin_ia32_vextractf128_ps256:
844933
case X86::BI__builtin_ia32_vextractf128_si256:

clang/test/CIR/CodeGen/X86/avx512f-builtins.c

Lines changed: 127 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -507,3 +507,130 @@ __m256i test_mm512_mask_i64gather_epi32(__m256i __v1_old, __mmask8 __mask, __m51
507507
// LLVM: @llvm.x86.avx512.mask.gather.qpi.512
508508
return _mm512_mask_i64gather_epi32(__v1_old, __mask, __index, __addr, 2);
509509
}
510+
511+
512+
void test_mm512_i32scatter_pd(void *__addr, __m256i __index, __m512d __v1) {
513+
// CIR-LABEL: test_mm512_i32scatter_pd
514+
// CIR: cir.llvm.intrinsic "x86.avx512.mask.scatter.dpd.512"
515+
516+
// LLVM-LABEL: test_mm512_i32scatter_pd
517+
// LLVM: @llvm.x86.avx512.mask.scatter.dpd.512
518+
return _mm512_i32scatter_pd(__addr, __index, __v1, 2);
519+
}
520+
521+
void test_mm512_mask_i32scatter_pd(void *__addr, __mmask8 __mask, __m256i __index, __m512d __v1) {
522+
// CIR-LABEL: test_mm512_mask_i32scatter_pd
523+
// CIR: cir.llvm.intrinsic "x86.avx512.mask.scatter.dpd.512"
524+
525+
// LLVM-LABEL: test_mm512_mask_i32scatter_pd
526+
// LLVM: @llvm.x86.avx512.mask.scatter.dpd.512
527+
return _mm512_mask_i32scatter_pd(__addr, __mask, __index, __v1, 2);
528+
}
529+
530+
void test_mm512_i32scatter_ps(void *__addr, __m512i __index, __m512 __v1) {
531+
// CIR-LABEL: test_mm512_i32scatter_ps
532+
// CIR: cir.llvm.intrinsic "x86.avx512.mask.scatter.dps.512"
533+
534+
// LLVM-LABEL: test_mm512_i32scatter_ps
535+
// LLVM: @llvm.x86.avx512.mask.scatter.dps.512
536+
return _mm512_i32scatter_ps(__addr, __index, __v1, 2);
537+
}
538+
539+
void test_mm512_mask_i32scatter_ps(void *__addr, __mmask16 __mask, __m512i __index, __m512 __v1) {
540+
// CIR-LABEL: test_mm512_mask_i32scatter_ps
541+
// CIR: cir.llvm.intrinsic "x86.avx512.mask.scatter.dps.512"
542+
543+
// LLVM-LABEL: test_mm512_mask_i32scatter_ps
544+
// LLVM: @llvm.x86.avx512.mask.scatter.dps.512
545+
return _mm512_mask_i32scatter_ps(__addr, __mask, __index, __v1, 2);
546+
}
547+
548+
void test_mm512_i64scatter_pd(void *__addr, __m512i __index, __m512d __v1) {
549+
// CIR-LABEL: test_mm512_i64scatter_pd
550+
// CIR: cir.llvm.intrinsic "x86.avx512.mask.scatter.qpd.512"
551+
552+
// LLVM-LABEL: test_mm512_i64scatter_pd
553+
// LLVM: @llvm.x86.avx512.mask.scatter.qpd.512
554+
return _mm512_i64scatter_pd(__addr, __index, __v1, 2);
555+
}
556+
557+
void test_mm512_mask_i64scatter_pd(void *__addr, __mmask8 __mask, __m512i __index, __m512d __v1) {
558+
// CIR-LABEL: test_mm512_mask_i64scatter_pd
559+
// CIR: cir.llvm.intrinsic "x86.avx512.mask.scatter.qpd.512"
560+
561+
// LLVM-LABEL: test_mm512_mask_i64scatter_pd
562+
// LLVM: @llvm.x86.avx512.mask.scatter.qpd.512
563+
return _mm512_mask_i64scatter_pd(__addr, __mask, __index, __v1, 2);
564+
}
565+
566+
void test_mm512_i64scatter_ps(void *__addr, __m512i __index, __m256 __v1) {
567+
// CIR-LABEL: test_mm512_i64scatter_ps
568+
// CIR: cir.llvm.intrinsic "x86.avx512.mask.scatter.qps.512"
569+
570+
// LLVM-LABEL: test_mm512_i64scatter_ps
571+
// LLVM: @llvm.x86.avx512.mask.scatter.qps.512
572+
return _mm512_i64scatter_ps(__addr, __index, __v1, 2);
573+
}
574+
575+
void test_mm512_mask_i64scatter_ps(void *__addr, __mmask8 __mask, __m512i __index, __m256 __v1) {
576+
// CIR-LABEL: test_mm512_mask_i64scatter_ps
577+
// CIR: cir.llvm.intrinsic "x86.avx512.mask.scatter.qps.512"
578+
579+
// LLVM-LABEL: test_mm512_mask_i64scatter_ps
580+
// LLVM: @llvm.x86.avx512.mask.scatter.qps.512
581+
return _mm512_mask_i64scatter_ps(__addr, __mask, __index, __v1, 2);
582+
}
583+
584+
void test_mm512_i32scatter_epi32(void *__addr, __m512i __index, __m512i __v1) {
585+
// CIR-LABEL: test_mm512_i32scatter_epi32
586+
// CIR: cir.llvm.intrinsic "x86.avx512.mask.scatter.dpi.512"
587+
588+
// LLVM-LABEL: test_mm512_i32scatter_epi32
589+
// LLVM: @llvm.x86.avx512.mask.scatter.dpi.512
590+
return _mm512_i32scatter_epi32(__addr, __index, __v1, 2);
591+
}
592+
593+
void test_mm512_mask_i32scatter_epi32(void *__addr, __mmask16 __mask, __m512i __index, __m512i __v1) {
594+
// CIR-LABEL: test_mm512_mask_i32scatter_epi32
595+
// CIR: cir.llvm.intrinsic "x86.avx512.mask.scatter.dpi.512"
596+
597+
// LLVM-LABEL: test_mm512_mask_i32scatter_epi32
598+
// LLVM: @llvm.x86.avx512.mask.scatter.dpi.512
599+
return _mm512_mask_i32scatter_epi32(__addr, __mask, __index, __v1, 2);
600+
}
601+
602+
void test_mm512_i64scatter_epi64(void *__addr, __m512i __index, __m512i __v1) {
603+
// CIR-LABEL: test_mm512_i64scatter_epi64
604+
// CIR: cir.llvm.intrinsic "x86.avx512.mask.scatter.qpq.512"
605+
606+
// LLVM-LABEL: test_mm512_i64scatter_epi64
607+
// LLVM: @llvm.x86.avx512.mask.scatter.qpq.512
608+
return _mm512_i64scatter_epi64(__addr, __index, __v1, 2);
609+
}
610+
611+
void test_mm512_mask_i64scatter_epi64(void *__addr, __mmask8 __mask, __m512i __index, __m512i __v1) {
612+
// CIR-LABEL: test_mm512_mask_i64scatter_epi64
613+
// CIR: cir.llvm.intrinsic "x86.avx512.mask.scatter.qpq.512"
614+
615+
// LLVM-LABEL: test_mm512_mask_i64scatter_epi64
616+
// LLVM: @llvm.x86.avx512.mask.scatter.qpq.512
617+
return _mm512_mask_i64scatter_epi64(__addr, __mask, __index, __v1, 2);
618+
}
619+
620+
void test_mm512_i64scatter_epi32(void *__addr, __m512i __index, __m256i __v1) {
621+
// CIR-LABEL: test_mm512_i64scatter_epi32
622+
// CIR: cir.llvm.intrinsic "x86.avx512.mask.scatter.qpi.512"
623+
624+
// LLVM-LABEL: test_mm512_i64scatter_epi32
625+
// LLVM: @llvm.x86.avx512.mask.scatter.qpi.512
626+
return _mm512_i64scatter_epi32(__addr, __index, __v1, 2);
627+
}
628+
629+
void test_mm512_mask_i64scatter_epi32(void *__addr, __mmask8 __mask, __m512i __index, __m256i __v1) {
630+
// CIR-LABEL: test_mm512_mask_i64scatter_epi32
631+
// CIR: cir.llvm.intrinsic "x86.avx512.mask.scatter.qpi.512"
632+
633+
// LLVM-LABEL: test_mm512_mask_i64scatter_epi32
634+
// LLVM: @llvm.x86.avx512.mask.scatter.qpi.512
635+
return _mm512_mask_i64scatter_epi32(__addr, __mask, __index, __v1, 2);
636+
}

0 commit comments

Comments
 (0)