Skip to content

Commit ebbe69b

Browse files
committed
a new pass for linalg_matmul
1 parent 54c068e commit ebbe69b

File tree

6 files changed

+400
-1
lines changed

6 files changed

+400
-1
lines changed
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,75 @@
1+
// RUN: buddy-opt %s \
2+
// RUN: -matmul-parallel-vectorization \
3+
// RUN: -convert-linalg-to-affine-loops \
4+
// RUN: -lower-affine \
5+
// RUN: -convert-vector-to-scf \
6+
// RUN: -convert-scf-to-cf \
7+
// RUN: -convert-vector-to-llvm \
8+
// RUN: -convert-math-to-llvm \
9+
// RUN: -convert-math-to-libm \
10+
// RUN: -convert-arith-to-llvm \
11+
// RUN: -convert-func-to-llvm \
12+
// RUN: -expand-strided-metadata \
13+
// RUN: -finalize-memref-to-llvm \
14+
// RUN: -reconcile-unrealized-casts \
15+
// RUN: | mlir-cpu-runner -e main -entry-point-result=void \
16+
// RUN: -shared-libs=%mlir_runner_utils_dir/libmlir_runner_utils%shlibext \
17+
// RUN: -shared-libs=%mlir_runner_utils_dir/libmlir_c_runner_utils%shlibext \
18+
// RUN: | FileCheck %s
19+
20+
func.func private @printMemrefF32(memref<*xf32>)
21+
22+
func.func @test(%a : memref<?x?xf32>, %b : memref<?x?xf32>, %c : memref<?x?xf32>) {
23+
linalg.matmul
24+
ins(%a, %b: memref<?x?xf32>, memref<?x?xf32>)
25+
outs(%c: memref<?x?xf32>)
26+
return
27+
}
28+
29+
func.func @alloc_f32(%arg0: index, %arg1: index, %arg4: f32) -> memref<?x?xf32> {
30+
%c0 = arith.constant 0 : index
31+
%c1 = arith.constant 1 : index
32+
%0 = memref.alloc(%arg0, %arg1) : memref<?x?xf32>
33+
scf.for %idx0 = %c0 to %arg0 step %c1 {
34+
scf.for %idx1 = %c0 to %arg1 step %c1 {
35+
memref.store %arg4, %0[%idx0, %idx1] : memref<?x?xf32>
36+
}
37+
}
38+
return %0 : memref<?x?xf32>
39+
}
40+
41+
func.func @main(){
42+
%c32 = arith.constant 32 : index
43+
%c1024 = arith.constant 1024 : index
44+
%c3 = arith.constant 3 : index
45+
%f0 = arith.constant 0.0 : f32
46+
%f1 = arith.constant 1.0 : f32
47+
48+
%m0 = call @alloc_f32(%c32,%c1024, %f1) : (index, index, f32) -> memref<?x?xf32>
49+
%m1 = call @alloc_f32(%c1024,%c32, %f1) : (index, index, f32) -> memref<?x?xf32>
50+
%m2 = call @alloc_f32(%c32,%c32, %f0) : (index, index, f32) -> memref<?x?xf32>
51+
52+
call @test(%m0, %m1, %m2) : (memref<?x?xf32>, memref<?x?xf32>, memref<?x?xf32>) -> ()
53+
54+
%printed_m2 = memref.cast %m2 : memref<?x?xf32> to memref<*xf32>
55+
56+
// CHECK: Unranked Memref base@ = {{.*}} rank = 2 offset = 0 sizes = [32, 32] strides = [32, 1] data =
57+
// CHECK-NEXT: [
58+
// CHECK: [1024{{(, 1024)*}}]
59+
call @printMemrefF32(%printed_m2) : (memref<*xf32>) -> ()
60+
61+
%m3 = call @alloc_f32(%c3,%c3, %f1) : (index, index, f32) -> memref<?x?xf32>
62+
%m4 = call @alloc_f32(%c3,%c3, %f1) : (index, index, f32) -> memref<?x?xf32>
63+
%m5 = call @alloc_f32(%c3,%c3, %f0) : (index, index, f32) -> memref<?x?xf32>
64+
65+
call @test(%m3, %m4, %m5) : (memref<?x?xf32>, memref<?x?xf32>, memref<?x?xf32>) -> ()
66+
67+
%printed_m5 = memref.cast %m5 : memref<?x?xf32> to memref<*xf32>
68+
69+
// CHECK: Unranked Memref base@ = {{.*}} rank = 2 offset = 0 sizes = [3, 3] strides = [3, 1] data =
70+
// CHECK-NEXT: [
71+
// CHECK: [3{{(, 3)*}}]
72+
call @printMemrefF32(%printed_m5) : (memref<*xf32>) -> ()
73+
74+
return
75+
}

examples/BuddyMatmul/makefile

+18
Original file line numberDiff line numberDiff line change
@@ -100,3 +100,21 @@ linalg-matmul-transpose-b-f32-run:
100100
-reconcile-unrealized-casts | \
101101
${MLIR_CPU_RUNNER} ${OPT_FLAG} -e main -entry-point-result=void \
102102
-shared-libs=${MLIR_RUNNER_UTILS} -shared-libs=${MLIR_C_RUNNER_UTILS}
103+
104+
linalg-matmul-run:
105+
@${BUDDY_OPT} ./linalg-matmul-f32.mlir\
106+
-matmul-parallel-vectorization \
107+
-convert-linalg-to-affine-loops \
108+
-lower-affine \
109+
-convert-vector-to-scf \
110+
-convert-scf-to-cf \
111+
-convert-vector-to-llvm \
112+
-convert-math-to-llvm \
113+
-convert-math-to-libm \
114+
-convert-arith-to-llvm \
115+
-convert-func-to-llvm \
116+
-expand-strided-metadata \
117+
-finalize-memref-to-llvm \
118+
-reconcile-unrealized-casts | \
119+
${MLIR_CPU_RUNNER} ${OPT_FLAG} -e main -entry-point-result=void \
120+
-shared-libs=${MLIR_RUNNER_UTILS} -shared-libs=${MLIR_C_RUNNER_UTILS}

midend/lib/Conversion/MatMulOptimization/CMakeLists.txt

+6-1
Original file line numberDiff line numberDiff line change
@@ -6,6 +6,7 @@ add_mlir_library(MatMulOptimization
66
BatchMatMulTileOptimize.cpp
77
BatchMatMulSCFOptimize.cpp
88
MatMulTransposeBVec.cpp
9+
MatMulParallelVec.cpp
910
BatchMatMulOptimize.cpp
1011
BatchMatMulTileOptimize.cpp
1112
BatchMatMulSCFOptimize.cpp
@@ -22,5 +23,9 @@ add_mlir_library(MatMulParallelVectorization
2223
)
2324

2425
add_mlir_library(MatMulTransposeBVec
25-
MatMulTransposeBVec.cpp
26+
MatMulTransposeBVec.cpp
27+
)
28+
29+
add_mlir_library(MatMulParallelVec
30+
MatMulParallelVec.cpp
2631
)

0 commit comments

Comments
 (0)