@@ -34,6 +34,27 @@ func.func @transpose_load_to_rocdl_16xi4(%idx1 : index, %idx2 : index, %wgmem :
34
34
// CHECK-LABEL: func @transpose_load_to_rocdl_3xi32
35
35
func.func @transpose_load_to_rocdl_3xi32 (%idx1 : index , %idx2 : index , %wgmem : memref <128 x32 xi32 , 3 >) -> vector <3 xi32 > {
36
36
// CHECK: rocdl.ds.read.tr6.b96
37
+ // CHECK-OLD: error: 'amdgpu.transpose_load' op Non-gfx950 chipset not supported
37
38
%0 = amdgpu.transpose_load %wgmem [%idx1 , %idx2 ] : memref <128 x32 xi32 , 3 > -> vector <3 xi32 >
38
39
return %0 : vector <3 xi32 >
39
40
}
41
+
42
+ // -----
43
+
44
+ // CHECK-LABEL: func @transpose_load_to_rocdl_i4_memrefxi1
45
+ func.func @transpose_load_to_rocdl_i4_memrefxi1 (%idx1 : index , %idx2 : index , %wgmem : memref <128 x32 xi8 , 3 >) -> vector <16 xi4 > {
46
+ // CHECK: rocdl.ds.read.tr4.b64
47
+ // CHECK-OLD: error: 'amdgpu.transpose_load' op Non-gfx950 chipset not supported
48
+ %0 = amdgpu.transpose_load %wgmem [%idx1 , %idx2 ] : memref <128 x32 xi8 , 3 > -> vector <16 xi4 >
49
+ return %0 : vector <16 xi4 >
50
+ }
51
+
52
+ // -----
53
+
54
+ // CHECK-LABEL: func @transpose_load_to_rocdl_i6_memrefxi1
55
+ func.func @transpose_load_to_rocdl_i6_memrefxi1 (%idx1 : index , %idx2 : index , %wgmem : memref <128 x32 xi8 , 3 >) -> vector <3 xi32 > {
56
+ // CHECK: rocdl.ds.read.tr6.b96
57
+ // CHECK-OLD: error: 'amdgpu.transpose_load' op Non-gfx950 chipset not supported
58
+ %0 = amdgpu.transpose_load %wgmem [%idx1 , %idx2 ] : memref <128 x32 xi8 , 3 > -> vector <3 xi32 >
59
+ return %0 : vector <3 xi32 >
60
+ }
0 commit comments