Skip to content

Commit adc6228

Browse files
authored
[mlir][xegpu] Refine layout assignment in XeGPU SIMT distribution. (#142687)
Changes: * Decouple layout propagation from subgroup distribution and move it to an independent pass. * Refine layout assignment to handle control-flow ops correctly (scf.for, scf.while). * Refine test cases.
1 parent 0d21c95 commit adc6228

File tree

9 files changed

+1726
-1727
lines changed

9 files changed

+1726
-1727
lines changed

mlir/include/mlir/Dialect/XeGPU/Transforms/Passes.td

Lines changed: 16 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -27,10 +27,23 @@ def XeGPUSubgroupDistribute : Pass<"xegpu-subgroup-distribute"> {
2727
}];
2828
let dependentDialects = ["memref::MemRefDialect", "xegpu::XeGPUDialect",
2929
"vector::VectorDialect"];
30+
}
31+
32+
def XeGPUPropagateLayout : Pass<"xegpu-propagate-layout"> {
33+
let summary = "Propagate and assign XeGPU layout information";
34+
let description = [{
35+
This pass propagates the XeGPU layout information accross ops. Starting
36+
from a set of anchor operations (e.g. `dpas`, `store_nd`), this will
37+
propagate the layouts required for their operands to the producers. With
38+
this propagated layout information, pass will then update op result type
39+
with the layout information.
40+
}];
41+
let dependentDialects = ["memref::MemRefDialect", "xegpu::XeGPUDialect",
42+
"vector::VectorDialect"];
3043
let options = [Option<
31-
"printOnly", "print-analysis-only", "bool",
32-
/*default=*/"false",
33-
"Print the result of the subgroup map propagation analysis and exit.">];
44+
"printOnly", "print-analysis-only", "bool",
45+
/*default=*/"false",
46+
"Print the result of layout propagation analysis and exit.">];
3447
}
3548

3649
def XeGPUWgToSgDistribute : Pass<"xegpu-wg-to-sg-distribute"> {

mlir/include/mlir/Dialect/XeGPU/Utils/XeGPUUtils.h

Lines changed: 14 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -24,6 +24,20 @@ class LayoutAttr;
2424
class TensorDescType;
2525
} // namespace xegpu
2626

27+
namespace xegpu {
28+
/// HW dependent constants.
29+
/// TODO: These constants should be queried from the target information.
30+
namespace targetinfo {
31+
constexpr unsigned subgroupSize = 16; // How many lanes in a subgroup.
32+
/// If DPAS A or B operands have low precision element types they must be packed
33+
/// according to the following sizes.
34+
constexpr unsigned packedSizeInBitsForDefault =
35+
16; // Minimum packing size per register for DPAS A.
36+
constexpr unsigned packedSizeInBitsForDpasB =
37+
32; // Minimum packing size per register for DPAS B.
38+
} // namespace targetinfo
39+
} // namespace xegpu
40+
2741
namespace xegpu {
2842

2943
/// Flatten a set of ValueRange into a single SmallVector<Value>

mlir/lib/Dialect/XeGPU/Transforms/CMakeLists.txt

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -4,6 +4,7 @@ add_mlir_dialect_library(MLIRXeGPUTransforms
44
XeGPUSubgroupDistribute.cpp
55
XeGPUUnroll.cpp
66
XeGPUWgToSgDistribute.cpp
7+
XeGPUPropagateLayout.cpp
78

89
ADDITIONAL_HEADER_DIRS
910
${MLIR_MAIN_INCLUDE_DIR}/mlir/Dialect/XeGPU

0 commit comments

Comments
 (0)