Skip to content

Commit 60e2c56

Browse files
committed
Adding loads from different value type.
1 parent bbb57ea commit 60e2c56

File tree

2 files changed

+22
-1
lines changed

2 files changed

+22
-1
lines changed

mlir/lib/Dialect/AMDGPU/IR/AMDGPUDialect.cpp

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -534,7 +534,7 @@ LogicalResult TransposeLoadOp::verify() {
534534
if (!transferType)
535535
return emitOpError("destination type must be a vector type");
536536
size_t numElements = transferType.getNumElements();
537-
size_t elementTypeSize = srcType.getElementType().getIntOrFloatBitWidth();
537+
size_t elementTypeSize = transferType.getElementType().getIntOrFloatBitWidth();
538538

539539
// ElementSize -> NumElements
540540
const std::map<size_t, size_t> KValidLoadSizeMap = {

mlir/test/Conversion/AMDGPUToROCDL/transpose_load.mlir

Lines changed: 21 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -34,6 +34,27 @@ func.func @transpose_load_to_rocdl_16xi4(%idx1 : index, %idx2 : index, %wgmem :
3434
// CHECK-LABEL: func @transpose_load_to_rocdl_3xi32
3535
func.func @transpose_load_to_rocdl_3xi32(%idx1 : index, %idx2 : index, %wgmem : memref<128x32xi32, 3>) -> vector<3xi32> {
3636
// CHECK: rocdl.ds.read.tr6.b96
37+
// CHECK-OLD: error: 'amdgpu.transpose_load' op Non-gfx950 chipset not supported
3738
%0 = amdgpu.transpose_load %wgmem[%idx1, %idx2] : memref<128x32xi32, 3> -> vector<3xi32>
3839
return %0 : vector<3xi32>
3940
}
41+
42+
// -----
43+
44+
// CHECK-LABEL: func @transpose_load_to_rocdl_i4_memrefxi1
45+
func.func @transpose_load_to_rocdl_i4_memrefxi1(%idx1 : index, %idx2 : index, %wgmem : memref<128x32xi8, 3>) -> vector<16xi4> {
46+
// CHECK: rocdl.ds.read.tr4.b64
47+
// CHECK-OLD: error: 'amdgpu.transpose_load' op Non-gfx950 chipset not supported
48+
%0 = amdgpu.transpose_load %wgmem[%idx1, %idx2] : memref<128x32xi8, 3> -> vector<16xi4>
49+
return %0 : vector<16xi4>
50+
}
51+
52+
// -----
53+
54+
// CHECK-LABEL: func @transpose_load_to_rocdl_i6_memrefxi1
55+
func.func @transpose_load_to_rocdl_i6_memrefxi1(%idx1 : index, %idx2 : index, %wgmem : memref<128x32xi8, 3>) -> vector<3xi32> {
56+
// CHECK: rocdl.ds.read.tr6.b96
57+
// CHECK-OLD: error: 'amdgpu.transpose_load' op Non-gfx950 chipset not supported
58+
%0 = amdgpu.transpose_load %wgmem[%idx1, %idx2] : memref<128x32xi8, 3> -> vector<3xi32>
59+
return %0 : vector<3xi32>
60+
}

0 commit comments

Comments
 (0)