44
44
#include " mlir/IR/Matchers.h"
45
45
#include " mlir/Pass/Pass.h"
46
46
#include " mlir/Pass/PassManager.h"
47
+ #include " mlir/Target/LLVMIR/Import.h"
47
48
#include " mlir/Target/LLVMIR/ModuleTranslation.h"
48
49
#include " llvm/ADT/ArrayRef.h"
49
50
#include " llvm/ADT/TypeSwitch.h"
@@ -61,14 +62,40 @@ namespace fir {
61
62
62
63
// TODO: This should really be recovered from the specified target.
63
64
static constexpr unsigned defaultAlign = 8 ;
65
+ static constexpr unsigned defaultAddressSpace = 0u ;
64
66
65
67
// / `fir.box` attribute values as defined for CFI_attribute_t in
66
68
// / flang/ISO_Fortran_binding.h.
67
69
static constexpr unsigned kAttrPointer = CFI_attribute_pointer;
68
70
static constexpr unsigned kAttrAllocatable = CFI_attribute_allocatable;
69
71
70
- static inline mlir::Type getLlvmPtrType (mlir::MLIRContext *context) {
71
- return mlir::LLVM::LLVMPointerType::get (context);
72
+ static inline unsigned
73
+ getAllocaAddressSpace (mlir::ConversionPatternRewriter &rewriter) {
74
+ mlir::Operation *parentOp = rewriter.getInsertionBlock ()->getParentOp ();
75
+ assert (parentOp != nullptr &&
76
+ " expected insertion block to have parent operation" );
77
+ if (auto module = parentOp->getParentOfType <mlir::ModuleOp>())
78
+ if (mlir::Attribute addrSpace =
79
+ mlir::DataLayout (module ).getAllocaMemorySpace ())
80
+ return llvm::cast<mlir::IntegerAttr>(addrSpace).getUInt ();
81
+ return defaultAddressSpace;
82
+ }
83
+
84
+ static inline unsigned
85
+ getProgramAddressSpace (mlir::ConversionPatternRewriter &rewriter) {
86
+ mlir::Operation *parentOp = rewriter.getInsertionBlock ()->getParentOp ();
87
+ assert (parentOp != nullptr &&
88
+ " expected insertion block to have parent operation" );
89
+ if (auto module = parentOp->getParentOfType <mlir::ModuleOp>())
90
+ if (mlir::Attribute addrSpace =
91
+ mlir::DataLayout (module ).getProgramMemorySpace ())
92
+ return llvm::cast<mlir::IntegerAttr>(addrSpace).getUInt ();
93
+ return defaultAddressSpace;
94
+ }
95
+
96
+ static inline mlir::Type getLlvmPtrType (mlir::MLIRContext *context,
97
+ unsigned addressSpace = 0 ) {
98
+ return mlir::LLVM::LLVMPointerType::get (context, addressSpace);
72
99
}
73
100
74
101
static inline mlir::Type getI8Type (mlir::MLIRContext *context) {
@@ -368,19 +395,37 @@ class FIROpConversion : public mlir::ConvertOpToLLVMPattern<FromOp> {
368
395
return getBlockForAllocaInsert (op->getParentOp ());
369
396
}
370
397
371
- // Generate an alloca of size 1 for an object of type \p llvmObjectTy.
372
- mlir::LLVM::AllocaOp
373
- genAllocaWithType (mlir::Location loc, mlir::Type llvmObjectTy,
374
- unsigned alignment,
375
- mlir::ConversionPatternRewriter &rewriter) const {
398
+ // Generate an alloca of size 1 for an object of type \p llvmObjectTy in the
399
+ // allocation address space provided for the architecture in the DataLayout
400
+ // specification. If the address space is different from the devices
401
+ // program address space we perform a cast. In the case of most architectures
402
+ // the program and allocation address space will be the default of 0 and no
403
+ // cast will be emitted.
404
+ mlir::Value genAllocaAndAddrCastWithType (
405
+ mlir::Location loc, mlir::Type llvmObjectTy, unsigned alignment,
406
+ mlir::ConversionPatternRewriter &rewriter) const {
376
407
auto thisPt = rewriter.saveInsertionPoint ();
377
408
mlir::Operation *parentOp = rewriter.getInsertionBlock ()->getParentOp ();
378
409
mlir::Block *insertBlock = getBlockForAllocaInsert (parentOp);
379
410
rewriter.setInsertionPointToStart (insertBlock);
380
411
auto size = genI32Constant (loc, rewriter, 1 );
381
- mlir::Type llvmPtrTy = ::getLlvmPtrType (llvmObjectTy.getContext ());
382
- auto al = rewriter.create <mlir::LLVM::AllocaOp>(
383
- loc, llvmPtrTy, llvmObjectTy, size, alignment);
412
+ unsigned allocaAs = getAllocaAddressSpace (rewriter);
413
+ unsigned programAs = getProgramAddressSpace (rewriter);
414
+
415
+ mlir::Value al = rewriter.create <mlir::LLVM::AllocaOp>(
416
+ loc, ::getLlvmPtrType (llvmObjectTy.getContext (), allocaAs),
417
+ llvmObjectTy, size, alignment);
418
+
419
+ // if our allocation address space, is not the same as the program address
420
+ // space, then we must emit a cast to the program address space before use.
421
+ // An example case would be on AMDGPU, where the allocation address space is
422
+ // the numeric value 5 (private), and the program address space is 0
423
+ // (generic).
424
+ if (allocaAs != programAs) {
425
+ al = rewriter.create <mlir::LLVM::AddrSpaceCastOp>(
426
+ loc, ::getLlvmPtrType (llvmObjectTy.getContext (), programAs), al);
427
+ }
428
+
384
429
rewriter.restoreInsertionPoint (thisPt);
385
430
return al;
386
431
}
@@ -532,20 +577,34 @@ struct AllocaOpConversion : public FIROpConversion<fir::AllocaOp> {
532
577
size = rewriter.create <mlir::LLVM::MulOp>(
533
578
loc, ity, size, integerCast (loc, rewriter, ity, operands[i]));
534
579
}
535
- mlir::Type llvmPtrTy = ::getLlvmPtrType (alloc.getContext ());
580
+
581
+ unsigned allocaAs = getAllocaAddressSpace (rewriter);
582
+ unsigned programAs = getProgramAddressSpace (rewriter);
583
+
536
584
// NOTE: we used to pass alloc->getAttrs() in the builder for non opaque
537
585
// pointers! Only propagate pinned and bindc_name to help debugging, but
538
586
// this should have no functional purpose (and passing the operand segment
539
587
// attribute like before is certainly bad).
540
588
auto llvmAlloc = rewriter.create <mlir::LLVM::AllocaOp>(
541
- loc, llvmPtrTy, llvmObjectType, size);
589
+ loc, ::getLlvmPtrType (alloc.getContext (), allocaAs), llvmObjectType,
590
+ size);
542
591
if (alloc.getPinned ())
543
592
llvmAlloc->setDiscardableAttr (alloc.getPinnedAttrName (),
544
593
alloc.getPinnedAttr ());
545
594
if (alloc.getBindcName ())
546
595
llvmAlloc->setDiscardableAttr (alloc.getBindcNameAttrName (),
547
596
alloc.getBindcNameAttr ());
548
- rewriter.replaceOp (alloc, llvmAlloc);
597
+ if (allocaAs == programAs) {
598
+ rewriter.replaceOp (alloc, llvmAlloc);
599
+ } else {
600
+ // if our allocation address space, is not the same as the program address
601
+ // space, then we must emit a cast to the program address space before
602
+ // use. An example case would be on AMDGPU, where the allocation address
603
+ // space is the numeric value 5 (private), and the program address space
604
+ // is 0 (generic).
605
+ rewriter.replaceOpWithNewOp <mlir::LLVM::AddrSpaceCastOp>(
606
+ alloc, ::getLlvmPtrType (alloc.getContext (), programAs), llvmAlloc);
607
+ }
549
608
return mlir::success ();
550
609
}
551
610
};
@@ -1691,8 +1750,8 @@ struct EmboxCommonConversion : public FIROpConversion<OP> {
1691
1750
if (isInGlobalOp (rewriter))
1692
1751
return boxValue;
1693
1752
mlir::Type llvmBoxTy = boxValue.getType ();
1694
- auto alloca =
1695
- this -> genAllocaWithType (loc, llvmBoxTy, defaultAlign, rewriter);
1753
+ auto alloca = this -> genAllocaAndAddrCastWithType (loc, llvmBoxTy,
1754
+ defaultAlign, rewriter);
1696
1755
auto storeOp = rewriter.create <mlir::LLVM::StoreOp>(loc, boxValue, alloca);
1697
1756
this ->attachTBAATag (storeOp, boxTy, boxTy, nullptr );
1698
1757
return alloca;
@@ -3110,11 +3169,11 @@ struct LoadOpConversion : public FIROpConversion<fir::LoadOp> {
3110
3169
else
3111
3170
attachTBAATag (boxValue, boxTy, boxTy, nullptr );
3112
3171
auto newBoxStorage =
3113
- genAllocaWithType (loc, llvmLoadTy, defaultAlign, rewriter);
3172
+ genAllocaAndAddrCastWithType (loc, llvmLoadTy, defaultAlign, rewriter);
3114
3173
auto storeOp =
3115
3174
rewriter.create <mlir::LLVM::StoreOp>(loc, boxValue, newBoxStorage);
3116
3175
attachTBAATag (storeOp, boxTy, boxTy, nullptr );
3117
- rewriter.replaceOp (load, newBoxStorage. getResult () );
3176
+ rewriter.replaceOp (load, newBoxStorage);
3118
3177
} else {
3119
3178
auto loadOp = rewriter.create <mlir::LLVM::LoadOp>(
3120
3179
load.getLoc (), llvmLoadTy, adaptor.getOperands (), load->getAttrs ());
@@ -3808,6 +3867,11 @@ class FIRToLLVMLowering
3808
3867
if (!forcedTargetTriple.empty ())
3809
3868
fir::setTargetTriple (mod, forcedTargetTriple);
3810
3869
3870
+ if (!forcedDataLayout.empty ()) {
3871
+ llvm::DataLayout dl (forcedDataLayout);
3872
+ fir::support::setMLIRDataLayout (mod, dl);
3873
+ }
3874
+
3811
3875
// Run dynamic pass pipeline for converting Math dialect
3812
3876
// operations into other dialects (llvm, func, etc.).
3813
3877
// Some conversions of Math operations cannot be done
0 commit comments