Skip to content

Commit abeb6c9

Browse files
authored
[Flang][MLIR] Add basic initial support for alloca and program address space handling in FIR->LLVMIR codegen (#77518)
This is a slightly more slimmed down and up-to-date version of the older PR from here: https://reviews.llvm.org/D144203, written by @jsjodin, which has already under gone some review. This PR places allocas in the alloca address space specified by the provided data layout (default is 0 for all address spaces, unless explicitly specified by the layout), and then will cast these alloca's to the program address space if this address space is different from the allocation address space. For most architectures data layouts, this will be a no-op, as they have a flat address space. But in the case of AMDGPU it will result in allocas being placed in the correct address space (5, private), and then casted into the correct program address space (0, generic). This results in correct (partially, a follow up PR will be forthcoming soon) generation of allocations inside of device code. This PR is in addition to the work by @skatrak in this PR: #69599 and adds seperate and neccesary functionality of casting alloca's from their address space to the program address space, both are independent PRs, although there is some minor overlap e.g. this PR incorporates some of the useful helper functions from 69599, so whichever lands first will need a minor rebase. Co-author: jsjodin
1 parent 509f634 commit abeb6c9

File tree

3 files changed

+157
-49
lines changed

3 files changed

+157
-49
lines changed

flang/include/flang/Optimizer/CodeGen/CGPasses.td

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -27,6 +27,8 @@ def FIRToLLVMLowering : Pass<"fir-to-llvm-ir", "mlir::ModuleOp"> {
2727
let options = [
2828
Option<"forcedTargetTriple", "target", "std::string", /*default=*/"",
2929
"Override module's target triple.">,
30+
Option<"forcedDataLayout", "datalayout", "std::string", /*default=*/"",
31+
"Override module's data layout.">,
3032
Option<"applyTBAA", "apply-tbaa", "bool", /*default=*/"false",
3133
"Attach TBAA tags to memory accessing operations.">
3234
];

flang/lib/Optimizer/CodeGen/CodeGen.cpp

Lines changed: 81 additions & 17 deletions
Original file line numberDiff line numberDiff line change
@@ -44,6 +44,7 @@
4444
#include "mlir/IR/Matchers.h"
4545
#include "mlir/Pass/Pass.h"
4646
#include "mlir/Pass/PassManager.h"
47+
#include "mlir/Target/LLVMIR/Import.h"
4748
#include "mlir/Target/LLVMIR/ModuleTranslation.h"
4849
#include "llvm/ADT/ArrayRef.h"
4950
#include "llvm/ADT/TypeSwitch.h"
@@ -61,14 +62,40 @@ namespace fir {
6162

6263
// TODO: This should really be recovered from the specified target.
6364
static constexpr unsigned defaultAlign = 8;
65+
static constexpr unsigned defaultAddressSpace = 0u;
6466

6567
/// `fir.box` attribute values as defined for CFI_attribute_t in
6668
/// flang/ISO_Fortran_binding.h.
6769
static constexpr unsigned kAttrPointer = CFI_attribute_pointer;
6870
static constexpr unsigned kAttrAllocatable = CFI_attribute_allocatable;
6971

70-
static inline mlir::Type getLlvmPtrType(mlir::MLIRContext *context) {
71-
return mlir::LLVM::LLVMPointerType::get(context);
72+
static inline unsigned
73+
getAllocaAddressSpace(mlir::ConversionPatternRewriter &rewriter) {
74+
mlir::Operation *parentOp = rewriter.getInsertionBlock()->getParentOp();
75+
assert(parentOp != nullptr &&
76+
"expected insertion block to have parent operation");
77+
if (auto module = parentOp->getParentOfType<mlir::ModuleOp>())
78+
if (mlir::Attribute addrSpace =
79+
mlir::DataLayout(module).getAllocaMemorySpace())
80+
return llvm::cast<mlir::IntegerAttr>(addrSpace).getUInt();
81+
return defaultAddressSpace;
82+
}
83+
84+
static inline unsigned
85+
getProgramAddressSpace(mlir::ConversionPatternRewriter &rewriter) {
86+
mlir::Operation *parentOp = rewriter.getInsertionBlock()->getParentOp();
87+
assert(parentOp != nullptr &&
88+
"expected insertion block to have parent operation");
89+
if (auto module = parentOp->getParentOfType<mlir::ModuleOp>())
90+
if (mlir::Attribute addrSpace =
91+
mlir::DataLayout(module).getProgramMemorySpace())
92+
return llvm::cast<mlir::IntegerAttr>(addrSpace).getUInt();
93+
return defaultAddressSpace;
94+
}
95+
96+
static inline mlir::Type getLlvmPtrType(mlir::MLIRContext *context,
97+
unsigned addressSpace = 0) {
98+
return mlir::LLVM::LLVMPointerType::get(context, addressSpace);
7299
}
73100

74101
static inline mlir::Type getI8Type(mlir::MLIRContext *context) {
@@ -368,19 +395,37 @@ class FIROpConversion : public mlir::ConvertOpToLLVMPattern<FromOp> {
368395
return getBlockForAllocaInsert(op->getParentOp());
369396
}
370397

371-
// Generate an alloca of size 1 for an object of type \p llvmObjectTy.
372-
mlir::LLVM::AllocaOp
373-
genAllocaWithType(mlir::Location loc, mlir::Type llvmObjectTy,
374-
unsigned alignment,
375-
mlir::ConversionPatternRewriter &rewriter) const {
398+
// Generate an alloca of size 1 for an object of type \p llvmObjectTy in the
399+
// allocation address space provided for the architecture in the DataLayout
400+
// specification. If the address space is different from the devices
401+
// program address space we perform a cast. In the case of most architectures
402+
// the program and allocation address space will be the default of 0 and no
403+
// cast will be emitted.
404+
mlir::Value genAllocaAndAddrCastWithType(
405+
mlir::Location loc, mlir::Type llvmObjectTy, unsigned alignment,
406+
mlir::ConversionPatternRewriter &rewriter) const {
376407
auto thisPt = rewriter.saveInsertionPoint();
377408
mlir::Operation *parentOp = rewriter.getInsertionBlock()->getParentOp();
378409
mlir::Block *insertBlock = getBlockForAllocaInsert(parentOp);
379410
rewriter.setInsertionPointToStart(insertBlock);
380411
auto size = genI32Constant(loc, rewriter, 1);
381-
mlir::Type llvmPtrTy = ::getLlvmPtrType(llvmObjectTy.getContext());
382-
auto al = rewriter.create<mlir::LLVM::AllocaOp>(
383-
loc, llvmPtrTy, llvmObjectTy, size, alignment);
412+
unsigned allocaAs = getAllocaAddressSpace(rewriter);
413+
unsigned programAs = getProgramAddressSpace(rewriter);
414+
415+
mlir::Value al = rewriter.create<mlir::LLVM::AllocaOp>(
416+
loc, ::getLlvmPtrType(llvmObjectTy.getContext(), allocaAs),
417+
llvmObjectTy, size, alignment);
418+
419+
// if our allocation address space, is not the same as the program address
420+
// space, then we must emit a cast to the program address space before use.
421+
// An example case would be on AMDGPU, where the allocation address space is
422+
// the numeric value 5 (private), and the program address space is 0
423+
// (generic).
424+
if (allocaAs != programAs) {
425+
al = rewriter.create<mlir::LLVM::AddrSpaceCastOp>(
426+
loc, ::getLlvmPtrType(llvmObjectTy.getContext(), programAs), al);
427+
}
428+
384429
rewriter.restoreInsertionPoint(thisPt);
385430
return al;
386431
}
@@ -532,20 +577,34 @@ struct AllocaOpConversion : public FIROpConversion<fir::AllocaOp> {
532577
size = rewriter.create<mlir::LLVM::MulOp>(
533578
loc, ity, size, integerCast(loc, rewriter, ity, operands[i]));
534579
}
535-
mlir::Type llvmPtrTy = ::getLlvmPtrType(alloc.getContext());
580+
581+
unsigned allocaAs = getAllocaAddressSpace(rewriter);
582+
unsigned programAs = getProgramAddressSpace(rewriter);
583+
536584
// NOTE: we used to pass alloc->getAttrs() in the builder for non opaque
537585
// pointers! Only propagate pinned and bindc_name to help debugging, but
538586
// this should have no functional purpose (and passing the operand segment
539587
// attribute like before is certainly bad).
540588
auto llvmAlloc = rewriter.create<mlir::LLVM::AllocaOp>(
541-
loc, llvmPtrTy, llvmObjectType, size);
589+
loc, ::getLlvmPtrType(alloc.getContext(), allocaAs), llvmObjectType,
590+
size);
542591
if (alloc.getPinned())
543592
llvmAlloc->setDiscardableAttr(alloc.getPinnedAttrName(),
544593
alloc.getPinnedAttr());
545594
if (alloc.getBindcName())
546595
llvmAlloc->setDiscardableAttr(alloc.getBindcNameAttrName(),
547596
alloc.getBindcNameAttr());
548-
rewriter.replaceOp(alloc, llvmAlloc);
597+
if (allocaAs == programAs) {
598+
rewriter.replaceOp(alloc, llvmAlloc);
599+
} else {
600+
// if our allocation address space, is not the same as the program address
601+
// space, then we must emit a cast to the program address space before
602+
// use. An example case would be on AMDGPU, where the allocation address
603+
// space is the numeric value 5 (private), and the program address space
604+
// is 0 (generic).
605+
rewriter.replaceOpWithNewOp<mlir::LLVM::AddrSpaceCastOp>(
606+
alloc, ::getLlvmPtrType(alloc.getContext(), programAs), llvmAlloc);
607+
}
549608
return mlir::success();
550609
}
551610
};
@@ -1691,8 +1750,8 @@ struct EmboxCommonConversion : public FIROpConversion<OP> {
16911750
if (isInGlobalOp(rewriter))
16921751
return boxValue;
16931752
mlir::Type llvmBoxTy = boxValue.getType();
1694-
auto alloca =
1695-
this->genAllocaWithType(loc, llvmBoxTy, defaultAlign, rewriter);
1753+
auto alloca = this->genAllocaAndAddrCastWithType(loc, llvmBoxTy,
1754+
defaultAlign, rewriter);
16961755
auto storeOp = rewriter.create<mlir::LLVM::StoreOp>(loc, boxValue, alloca);
16971756
this->attachTBAATag(storeOp, boxTy, boxTy, nullptr);
16981757
return alloca;
@@ -3110,11 +3169,11 @@ struct LoadOpConversion : public FIROpConversion<fir::LoadOp> {
31103169
else
31113170
attachTBAATag(boxValue, boxTy, boxTy, nullptr);
31123171
auto newBoxStorage =
3113-
genAllocaWithType(loc, llvmLoadTy, defaultAlign, rewriter);
3172+
genAllocaAndAddrCastWithType(loc, llvmLoadTy, defaultAlign, rewriter);
31143173
auto storeOp =
31153174
rewriter.create<mlir::LLVM::StoreOp>(loc, boxValue, newBoxStorage);
31163175
attachTBAATag(storeOp, boxTy, boxTy, nullptr);
3117-
rewriter.replaceOp(load, newBoxStorage.getResult());
3176+
rewriter.replaceOp(load, newBoxStorage);
31183177
} else {
31193178
auto loadOp = rewriter.create<mlir::LLVM::LoadOp>(
31203179
load.getLoc(), llvmLoadTy, adaptor.getOperands(), load->getAttrs());
@@ -3808,6 +3867,11 @@ class FIRToLLVMLowering
38083867
if (!forcedTargetTriple.empty())
38093868
fir::setTargetTriple(mod, forcedTargetTriple);
38103869

3870+
if (!forcedDataLayout.empty()) {
3871+
llvm::DataLayout dl(forcedDataLayout);
3872+
fir::support::setMLIRDataLayout(mod, dl);
3873+
}
3874+
38113875
// Run dynamic pass pipeline for converting Math dialect
38123876
// operations into other dialects (llvm, func, etc.).
38133877
// Some conversions of Math operations cannot be done

0 commit comments

Comments
 (0)