@@ -115,21 +115,49 @@ static bool isMultipleOfN(const Value *V, const DataLayout &DL, unsigned N) {
115
115
// / %vec0 = extractelement { <4 x i32>, <4 x i32> } %ld2, i32 0
116
116
// / %vec1 = extractelement { <4 x i32>, <4 x i32> } %ld2, i32 1
117
117
bool RISCVTargetLowering::lowerInterleavedLoad (
118
- LoadInst *LI , ArrayRef<ShuffleVectorInst *> Shuffles,
118
+ Instruction *Load, Value *Mask , ArrayRef<ShuffleVectorInst *> Shuffles,
119
119
ArrayRef<unsigned > Indices, unsigned Factor) const {
120
120
assert (Indices.size () == Shuffles.size ());
121
121
122
- IRBuilder<> Builder (LI);
123
-
124
- const DataLayout &DL = LI->getDataLayout ();
122
+ IRBuilder<> Builder (Load);
125
123
124
+ const DataLayout &DL = Load->getDataLayout ();
126
125
auto *VTy = cast<FixedVectorType>(Shuffles[0 ]->getType ());
127
- if (!isLegalInterleavedAccessType (VTy, Factor, LI->getAlign (),
128
- LI->getPointerAddressSpace (), DL))
129
- return false ;
126
+ auto *XLenTy = Type::getIntNTy (Load->getContext (), Subtarget.getXLen ());
127
+
128
+ Value *Ptr, *VL;
129
+ Align Alignment;
130
+ if (auto *LI = dyn_cast<LoadInst>(Load)) {
131
+ assert (LI->isSimple ());
132
+ Ptr = LI->getPointerOperand ();
133
+ Alignment = LI->getAlign ();
134
+ assert (!Mask && " Unexpected mask on a load\n " );
135
+ Mask = Builder.getAllOnesMask (VTy->getElementCount ());
136
+ VL = Builder.CreateElementCount (XLenTy, VTy->getElementCount ());
137
+ } else {
138
+ auto *VPLoad = cast<VPIntrinsic>(Load);
139
+ assert (VPLoad->getIntrinsicID () == Intrinsic::vp_load &&
140
+ " Unexpected intrinsic" );
141
+ Ptr = VPLoad->getMemoryPointerParam ();
142
+ Alignment = VPLoad->getPointerAlignment ().value_or (
143
+ DL.getABITypeAlign (VTy->getElementType ()));
130
144
131
- auto *PtrTy = LI->getPointerOperandType ();
132
- auto *XLenTy = Type::getIntNTy (LI->getContext (), Subtarget.getXLen ());
145
+ assert (Mask && " vp.load needs a mask!" );
146
+
147
+ Value *WideEVL = VPLoad->getVectorLengthParam ();
148
+ // Conservatively check if EVL is a multiple of factor, otherwise some
149
+ // (trailing) elements might be lost after the transformation.
150
+ if (!isMultipleOfN (WideEVL, DL, Factor))
151
+ return false ;
152
+
153
+ auto *FactorC = ConstantInt::get (WideEVL->getType (), Factor);
154
+ VL = Builder.CreateZExt (Builder.CreateExactUDiv (WideEVL, FactorC), XLenTy);
155
+ }
156
+
157
+ Type *PtrTy = Ptr->getType ();
158
+ unsigned AS = PtrTy->getPointerAddressSpace ();
159
+ if (!isLegalInterleavedAccessType (VTy, Factor, Alignment, AS, DL))
160
+ return false ;
133
161
134
162
// If the segment load is going to be performed segment at a time anyways
135
163
// and there's only one element used, use a strided load instead. This
@@ -138,26 +166,23 @@ bool RISCVTargetLowering::lowerInterleavedLoad(
138
166
unsigned ScalarSizeInBytes = DL.getTypeStoreSize (VTy->getElementType ());
139
167
Value *Stride = ConstantInt::get (XLenTy, Factor * ScalarSizeInBytes);
140
168
Value *Offset = ConstantInt::get (XLenTy, Indices[0 ] * ScalarSizeInBytes);
141
- Value *BasePtr = Builder.CreatePtrAdd (LI-> getPointerOperand () , Offset);
142
- Value *Mask = Builder. getAllOnesMask (VTy-> getElementCount ());
143
- Value *VL = Builder. CreateElementCount (Builder. getInt32Ty (),
144
- VTy-> getElementCount ());
145
-
169
+ Value *BasePtr = Builder.CreatePtrAdd (Ptr , Offset);
170
+ // Note: Same VL as above, but i32 not xlen due to signature of
171
+ // vp.strided.load
172
+ VL = Builder. CreateElementCount (Builder. getInt32Ty (),
173
+ VTy-> getElementCount ());
146
174
CallInst *CI =
147
175
Builder.CreateIntrinsic (Intrinsic::experimental_vp_strided_load,
148
176
{VTy, BasePtr->getType (), Stride->getType ()},
149
177
{BasePtr, Stride, Mask, VL});
150
- CI->addParamAttr (
151
- 0 , Attribute::getWithAlignment (CI->getContext (), LI-> getAlign () ));
178
+ CI->addParamAttr (0 ,
179
+ Attribute::getWithAlignment (CI->getContext (), Alignment ));
152
180
Shuffles[0 ]->replaceAllUsesWith (CI);
153
181
return true ;
154
182
};
155
183
156
- Value *VL = Builder.CreateElementCount (XLenTy, VTy->getElementCount ());
157
- Value *Mask = Builder.getAllOnesMask (VTy->getElementCount ());
158
184
CallInst *VlsegN = Builder.CreateIntrinsic (
159
- FixedVlsegIntrIds[Factor - 2 ], {VTy, PtrTy, XLenTy},
160
- {LI->getPointerOperand (), Mask, VL});
185
+ FixedVlsegIntrIds[Factor - 2 ], {VTy, PtrTy, XLenTy}, {Ptr, Mask, VL});
161
186
162
187
for (unsigned i = 0 ; i < Shuffles.size (); i++) {
163
188
Value *SubVec = Builder.CreateExtractValue (VlsegN, Indices[i]);
@@ -426,122 +451,6 @@ bool RISCVTargetLowering::lowerInterleaveIntrinsicToStore(
426
451
return true ;
427
452
}
428
453
429
- // / Lower an interleaved vp.load into a vlsegN intrinsic.
430
- // /
431
- // / E.g. Lower an interleaved vp.load (Factor = 2):
432
- // / %l = call <vscale x 64 x i8> @llvm.vp.load.nxv64i8.p0(ptr %ptr,
433
- // / %mask,
434
- // / i32 %wide.rvl)
435
- // / %dl = tail call { <vscale x 32 x i8>, <vscale x 32 x i8> }
436
- // / @llvm.vector.deinterleave2.nxv64i8(
437
- // / <vscale x 64 x i8> %l)
438
- // / %r0 = extractvalue { <vscale x 32 x i8>, <vscale x 32 x i8> } %dl, 0
439
- // / %r1 = extractvalue { <vscale x 32 x i8>, <vscale x 32 x i8> } %dl, 1
440
- // /
441
- // / Into:
442
- // / %rvl = udiv %wide.rvl, 2
443
- // / %sl = call { <vscale x 32 x i8>, <vscale x 32 x i8> }
444
- // / @llvm.riscv.vlseg2.mask.nxv32i8.i64(<vscale x 32 x i8> undef,
445
- // / <vscale x 32 x i8> undef,
446
- // / ptr %ptr,
447
- // / %mask,
448
- // / i64 %rvl,
449
- // / i64 1)
450
- // / %r0 = extractvalue { <vscale x 32 x i8>, <vscale x 32 x i8> } %sl, 0
451
- // / %r1 = extractvalue { <vscale x 32 x i8>, <vscale x 32 x i8> } %sl, 1
452
- // /
453
- // / NOTE: the deinterleave2 intrinsic won't be touched and is expected to be
454
- // / removed by the caller
455
- // / TODO: We probably can loosen the dependency on matching extractvalue when
456
- // / dealing with factor of 2 (extractvalue is still required for most of other
457
- // / factors though).
458
- bool RISCVTargetLowering::lowerInterleavedVPLoad (
459
- VPIntrinsic *Load, Value *Mask,
460
- ArrayRef<Value *> DeinterleaveResults) const {
461
- const unsigned Factor = DeinterleaveResults.size ();
462
- assert (Mask && " Expect a valid mask" );
463
- assert (Load->getIntrinsicID () == Intrinsic::vp_load &&
464
- " Unexpected intrinsic" );
465
-
466
- Value *FirstActive = *llvm::find_if (DeinterleaveResults,
467
- [](Value *V) { return V != nullptr ; });
468
- VectorType *VTy = cast<VectorType>(FirstActive->getType ());
469
-
470
- auto &DL = Load->getModule ()->getDataLayout ();
471
- Align Alignment = Load->getParamAlign (0 ).value_or (
472
- DL.getABITypeAlign (VTy->getElementType ()));
473
- if (!isLegalInterleavedAccessType (
474
- VTy, Factor, Alignment,
475
- Load->getArgOperand (0 )->getType ()->getPointerAddressSpace (), DL))
476
- return false ;
477
-
478
- IRBuilder<> Builder (Load);
479
-
480
- Value *WideEVL = Load->getVectorLengthParam ();
481
- // Conservatively check if EVL is a multiple of factor, otherwise some
482
- // (trailing) elements might be lost after the transformation.
483
- if (!isMultipleOfN (WideEVL, Load->getDataLayout (), Factor))
484
- return false ;
485
-
486
- auto *PtrTy = Load->getArgOperand (0 )->getType ();
487
- auto *XLenTy = Type::getIntNTy (Load->getContext (), Subtarget.getXLen ());
488
- auto *FactorC = ConstantInt::get (WideEVL->getType (), Factor);
489
- Value *EVL =
490
- Builder.CreateZExt (Builder.CreateExactUDiv (WideEVL, FactorC), XLenTy);
491
-
492
- Value *Return = nullptr ;
493
- if (isa<FixedVectorType>(VTy)) {
494
- Return = Builder.CreateIntrinsic (FixedVlsegIntrIds[Factor - 2 ],
495
- {VTy, PtrTy, XLenTy},
496
- {Load->getArgOperand (0 ), Mask, EVL});
497
- } else {
498
- unsigned SEW = DL.getTypeSizeInBits (VTy->getElementType ());
499
- unsigned NumElts = VTy->getElementCount ().getKnownMinValue ();
500
- Type *VecTupTy = TargetExtType::get (
501
- Load->getContext (), " riscv.vector.tuple" ,
502
- ScalableVectorType::get (Type::getInt8Ty (Load->getContext ()),
503
- NumElts * SEW / 8 ),
504
- Factor);
505
-
506
- Function *VlsegNFunc = Intrinsic::getOrInsertDeclaration (
507
- Load->getModule (), ScalableVlsegIntrIds[Factor - 2 ],
508
- {VecTupTy, PtrTy, Mask->getType (), EVL->getType ()});
509
-
510
- Value *Operands[] = {
511
- PoisonValue::get (VecTupTy),
512
- Load->getArgOperand (0 ),
513
- Mask,
514
- EVL,
515
- ConstantInt::get (XLenTy,
516
- RISCVVType::TAIL_AGNOSTIC | RISCVVType::MASK_AGNOSTIC),
517
- ConstantInt::get (XLenTy, Log2_64 (SEW))};
518
-
519
- CallInst *VlsegN = Builder.CreateCall (VlsegNFunc, Operands);
520
-
521
- SmallVector<Type *, 8 > AggrTypes{Factor, VTy};
522
- Return = PoisonValue::get (StructType::get (Load->getContext (), AggrTypes));
523
- Function *VecExtractFunc = Intrinsic::getOrInsertDeclaration (
524
- Load->getModule (), Intrinsic::riscv_tuple_extract, {VTy, VecTupTy});
525
- for (unsigned i = 0 ; i < Factor; ++i) {
526
- Value *VecExtract =
527
- Builder.CreateCall (VecExtractFunc, {VlsegN, Builder.getInt32 (i)});
528
- Return = Builder.CreateInsertValue (Return, VecExtract, i);
529
- }
530
- }
531
-
532
- for (auto [Idx, DIO] : enumerate(DeinterleaveResults)) {
533
- if (!DIO)
534
- continue ;
535
- // We have to create a brand new ExtractValue to replace each
536
- // of these old ExtractValue instructions.
537
- Value *NewEV =
538
- Builder.CreateExtractValue (Return, {static_cast <unsigned >(Idx)});
539
- DIO->replaceAllUsesWith (NewEV);
540
- }
541
-
542
- return true ;
543
- }
544
-
545
454
// / Lower an interleaved vp.store into a vssegN intrinsic.
546
455
// /
547
456
// / E.g. Lower an interleaved vp.store (Factor = 2):
0 commit comments