1
- From 177cce531fd3665bb964a03db51890e0241e3e72 Mon Sep 17 00:00:00 2001
1
+ From e80206b25bfc4120351bc7c42ac856d6b7257f01 Mon Sep 17 00:00:00 2001
2
2
From: Alexey Sotkin <
[email protected] >
3
3
Date: Thu, 21 Feb 2019 17:14:36 +0300
4
4
Subject: [PATCH] Update LowerOpenCL pass to handle new blocks represntation in
@@ -9,8 +9,8 @@ Subject: [PATCH] Update LowerOpenCL pass to handle new blocks represntation in
9
9
test/global_block.ll | 71 ++++-----
10
10
test/literal-struct.ll | 31 ++--
11
11
test/transcoding/block_w_struct_return.ll | 47 +++---
12
- test/transcoding/enqueue_kernel.ll | 237 ++++++++++++++++------------
13
- 5 files changed, 235 insertions(+), 400 deletions(-)
12
+ test/transcoding/enqueue_kernel.ll | 248 ++++++++++++++++- ------------
13
+ 5 files changed, 235 insertions(+), 411 deletions(-)
14
14
15
15
diff --git a/lib/SPIRV/SPIRVLowerOCLBlocks.cpp b/lib/SPIRV/SPIRVLowerOCLBlocks.cpp
16
16
index c80bf04..b42a4ec 100644
@@ -602,10 +602,10 @@ index a68820f..ebd2c5f 100644
602
602
+ !5 = !{!"int*"}
603
603
+ !6 = !{!""}
604
604
diff --git a/test/transcoding/enqueue_kernel.ll b/test/transcoding/enqueue_kernel.ll
605
- index 1f0b360..761043e 100644
605
+ index 23b230a..c164d37 100644
606
606
--- a/test/transcoding/enqueue_kernel.ll
607
607
+++ b/test/transcoding/enqueue_kernel.ll
608
- @@ -51 ,11 +51 ,12 @@
608
+ @@ -57 ,11 +57 ,12 @@
609
609
; ModuleID = 'enqueue_kernel.cl'
610
610
source_filename = "enqueue_kernel.cl"
611
611
target datalayout = "e-p:32:32-i64:64-v16:16-v24:32-v32:32-v48:64-v96:128-v192:256-v256:256-v512:512-v1024:1024"
@@ -619,7 +619,7 @@ index 1f0b360..761043e 100644
619
619
620
620
; CHECK-SPIRV: EntryPoint {{[0-9]+}} [[BlockKer1:[0-9]+]] "__device_side_enqueue_block_invoke_kernel"
621
621
; CHECK-SPIRV: EntryPoint {{[0-9]+}} [[BlockKer2:[0-9]+]] "__device_side_enqueue_block_invoke_2_kernel"
622
- @@ -66 ,89 +67 ,123 @@ target triple = "spir-unknown-unknown"
622
+ @@ -73 ,89 +74 ,123 @@ target triple = "spir-unknown-unknown"
623
623
624
624
; CHECK-SPIRV: TypeInt [[Int32Ty:[0-9]+]] 32
625
625
; CHECK-SPIRV: TypeInt [[Int8Ty:[0-9]+]] 8
@@ -649,10 +649,10 @@ index 1f0b360..761043e 100644
649
649
- ; CHECK-LLVM: [[BlockTy2:%[0-9a-z\.]+]] = type <{ i32, i32, i32 addrspace(1)*, i32, i8 }>
650
650
- ; CHECK-LLVM: [[BlockTy3:%[0-9a-z\.]+]] = type <{ i32, i32, i32 addrspace(1)*, i32, i32 addrspace(1)* }>
651
651
- ; CHECK-LLVM: [[BlockTy4:%[0-9a-z\.]+]] = type <{ i32, i32 }>
652
- + ; CHECK-LLVM: [[BlockTy1:%[0-9a-z\. ]+]] = type { i32, i32, i8 addrspace(4)* }
653
- + ; CHECK-LLVM: [[BlockTy2:%[0-9a-z\. ]+]] = type <{ i32, i32, i8 addrspace(4)*, i32 addrspace(1)*, i32, i8 }>
654
- + ; CHECK-LLVM: [[BlockTy3:%[0-9a-z\. ]+]] = type <{ i32, i32, i8 addrspace(4)*, i32 addrspace(1)*, i32, i32 addrspace(1)* }>
655
- + ; CHECK-LLVM: [[BlockTy4:%[0-9a-z\. ]+]] = type <{ i32, i32, i8 addrspace(4)* }>
652
+ + ; CHECK-LLVM: [[BlockTy1:%[0-9 ]+]] = type { i32, i32, i8 addrspace(4)* }
653
+ + ; CHECK-LLVM: [[BlockTy2:%[0-9 ]+]] = type <{ i32, i32, i8 addrspace(4)*, i32 addrspace(1)*, i32, i8 }>
654
+ + ; CHECK-LLVM: [[BlockTy3:%[0-9 ]+]] = type <{ i32, i32, i8 addrspace(4)*, i32 addrspace(1)*, i32, i32 addrspace(1)* }>
655
+ + ; CHECK-LLVM: [[BlockTy4:%[0-9 ]+]] = type <{ i32, i32, i8 addrspace(4)* }>
656
656
657
657
- ; CHECK-LLVM: @__block_literal_global = internal addrspace(1) constant [[BlockTy1]] { i32 8, i32 4 }, align 4
658
658
- ; CHECK-LLVM: @__block_literal_global.1 = internal addrspace(1) constant [[BlockTy1]] { i32 8, i32 4 }, align 4
@@ -740,7 +740,7 @@ index 1f0b360..761043e 100644
740
740
+ ; CHECK-LLVM: [[Block2:%[0-9]+]] = bitcast [[BlockTy2]]* %block to %struct.__opencl_block_literal_generic*
741
741
+ ; CHECK-LLVM: [[Block2Ptr:%[0-9]+]] = addrspacecast %struct.__opencl_block_literal_generic* [[Block2]] to i8 addrspace(4)*
742
742
; CHECK-LLVM: [[BlockInv2:%[0-9]+]] = addrspacecast void (i8 addrspace(4)*)* @__device_side_enqueue_block_invoke_kernel to i8 addrspace(4)*
743
- - ; CHECK-LLVM: call i32 @__enqueue_kernel_basic_events (%opencl.queue_t* {{.*}}, i32 {{.*}}, %struct.ndrange_t* {{.*}}, i32 0, %opencl.clk_event_t* addrspace(4)* null, %opencl.clk_event_t* addrspace(4)* null , i8 addrspace(4)* [[BlockInv2]], i8 addrspace(4)* [[Block2]])
743
+ - ; CHECK-LLVM: call i32 @__enqueue_kernel_basic (%opencl.queue_t* {{.*}}, i32 {{.*}}, %struct.ndrange_t* {{.*}}, i8 addrspace(4)* [[BlockInv2]], i8 addrspace(4)* [[Block2]])
744
744
-
745
745
- %5 = call i32 @__enqueue_kernel_basic(%opencl.queue_t* %1, i32 %2, %struct.ndrange_t* byval %ndrange, i8 addrspace(4)* addrspacecast (i8* bitcast (void (i8 addrspace(4)*)* @__device_side_enqueue_block_invoke_kernel to i8*) to i8 addrspace(4)*), i8 addrspace(4)* %4)
746
746
- %6 = addrspacecast %opencl.clk_event_t** %event_wait_list to %opencl.clk_event_t* addrspace(4)*
@@ -787,7 +787,7 @@ index 1f0b360..761043e 100644
787
787
788
788
; CHECK-SPIRV: PtrCastToGeneric [[EventPtrTy]] [[Event1:[0-9]+]]
789
789
; CHECK-SPIRV: PtrCastToGeneric [[EventPtrTy]] [[Event2:[0-9]+]]
790
- @@ -158 ,16 +193 ,24 @@ entry:
790
+ @@ -165 ,16 +200 ,24 @@ entry:
791
791
; [[ConstInt2]] [[Event1]] [[Event2]]
792
792
; [[BlockKer2]] [[BlockLit2]] [[ConstInt20]] [[ConstInt8]]
793
793
@@ -821,7 +821,7 @@ index 1f0b360..761043e 100644
821
821
822
822
; CHECK-SPIRV: PtrAccessChain [[Int32LocPtrTy]] [[LocalBuf31:[0-9]+]]
823
823
; CHECK-SPIRV: Bitcast {{[0-9]+}} [[BlockLit3Tmp:[0-9]+]] [[BlockGlb1:[0-9]+]]
824
- @@ -182 ,14 +225 ,18 @@ entry:
824
+ @@ -189 ,14 +232 ,18 @@ entry:
825
825
; CHECK-LLVM: [[BlockInv0:%[0-9]+]] = addrspacecast void (i8 addrspace(4)*, i8 addrspace(3)*)* @__device_side_enqueue_block_invoke_3_kernel to i8 addrspace(4)*
826
826
; CHECK-LLVM: call i32 @__enqueue_kernel_events_varargs(%opencl.queue_t* {{.*}}, i32 {{.*}}, %struct.ndrange_t* {{.*}}, i32 2, %opencl.clk_event_t* addrspace(4)* {{.*}}, %opencl.clk_event_t* addrspace(4)* {{.*}}, i8 addrspace(4)* [[BlockInv0]], i8 addrspace(4)* [[Block0]], i32 1, i32* {{.*}})
827
827
@@ -848,11 +848,22 @@ index 1f0b360..761043e 100644
848
848
849
849
; CHECK-SPIRV: PtrAccessChain [[Int32LocPtrTy]] [[LocalBuf41:[0-9]+]]
850
850
; CHECK-SPIRV: PtrAccessChain [[Int32LocPtrTy]] [[LocalBuf42:[0-9]+]]
851
- @@ -206,24 +253 ,27 @@ entry:
851
+ @@ -213,35 +260 ,27 @@ entry:
852
852
; CHECK-LLVM: [[BlockInv1:%[0-9]+]] = addrspacecast void (i8 addrspace(4)*, i8 addrspace(3)*, i8 addrspace(3)*, i8 addrspace(3)*)* @__device_side_enqueue_block_invoke_4_kernel to i8 addrspace(4)*
853
- ; CHECK-LLVM: call i32 @__enqueue_kernel_events_varargs (%opencl.queue_t* {{.*}}, i32 {{.*}}, %struct.ndrange_t* {{.*}}, i32 0, %opencl.clk_event_t* addrspace(4)* null, %opencl.clk_event_t* addrspace(4)* null , i8 addrspace(4)* [[BlockInv1]], i8 addrspace(4)* [[Block1]], i32 3, i32* {{.*}})
853
+ ; CHECK-LLVM: call i32 @__enqueue_kernel_varargs (%opencl.queue_t* {{.*}}, i32 {{.*}}, %struct.ndrange_t* {{.*}}, i8 addrspace(4)* [[BlockInv1]], i8 addrspace(4)* [[Block1]], i32 3, i32* {{.*}})
854
854
855
855
- %20 = call i32 @__enqueue_kernel_varargs(%opencl.queue_t* %1, i32 %2, %struct.ndrange_t* %ndrange, i8 addrspace(4)* addrspacecast (i8* bitcast (void (i8 addrspace(4)*, i8 addrspace(3)*, i8 addrspace(3)*, i8 addrspace(3)*)* @__device_side_enqueue_block_invoke_4_kernel to i8*) to i8 addrspace(4)*), i8 addrspace(4)* addrspacecast (i8 addrspace(1)* bitcast ({ i32, i32 } addrspace(1)* @__block_literal_global.1 to i8 addrspace(1)*) to i8 addrspace(4)*), i32 3, i32* %17)
856
+ -
857
+ - ; CHECK-SPIRV: PtrCastToGeneric [[Int8PtrGenTy]] [[BlockLit2:[0-9]+]]
858
+ - ; CHECK-SPIRV: EnqueueKernel [[Int32Ty]] {{[0-9]+}} {{[0-9]+}} {{[0-9]+}} {{[0-9]+}}
859
+ - ; [[ConstInt0]] [[EventNull]] [[Event1]]
860
+ - ; [[BlockKer5]] [[BlockLit5]] [[ConstInt20]] [[ConstInt8]]
861
+ -
862
+ - ; CHECK-LLVM: [[BlockInv5:%[0-9]+]] = addrspacecast void (i8 addrspace(4)*)* @__device_side_enqueue_block_invoke_5_kernel to i8 addrspace(4)*
863
+ - ; CHECK-LLVM: call i32 @__enqueue_kernel_basic_events(%opencl.queue_t* {{.*}}, i32 {{.*}}, %struct.ndrange_t* {{.*}}, i32 0, %opencl.clk_event_t* addrspace(4)* null, %opencl.clk_event_t* addrspace(4)* {{.*}}, i8 addrspace(4)* [[BlockInv5]], i8 addrspace(4)* [[Block3]])
864
+ -
865
+ - %21 = call i32 @__enqueue_kernel_basic_events(%opencl.queue_t* %1, i32 %2, %struct.ndrange_t* %ndrange, i32 0, %opencl.clk_event_t* addrspace(4)* null, %opencl.clk_event_t* addrspace(4)* %7, i8 addrspace(4)* addrspacecast (i8* bitcast (void (i8 addrspace(4)*)* @__device_side_enqueue_block_invoke_5_kernel to i8*) to i8 addrspace(4)*), i8 addrspace(4)* %9)
866
+ -
856
867
+ %40 = call i32 @__enqueue_kernel_varargs(%opencl.queue_t* %33, i32 %34, %struct.ndrange_t* %tmp12, i8 addrspace(4)* addrspacecast (i8* bitcast (void (i8 addrspace(4)*, i8 addrspace(3)*, i8 addrspace(3)*, i8 addrspace(3)*)* @__device_side_enqueue_block_invoke_4_kernel to i8*) to i8 addrspace(4)*), i8 addrspace(4)* addrspacecast (i8 addrspace(1)* bitcast ({ i32, i32, i8 addrspace(4)* } addrspace(1)* @__block_literal_global.1 to i8 addrspace(1)*) to i8 addrspace(4)*), i32 3, i32* %37)
857
868
ret void
858
869
}
@@ -883,7 +894,7 @@ index 1f0b360..761043e 100644
883
894
%2 = load i32, i32 addrspace(4)* %block.capture.addr2, align 4
884
895
%arrayidx = getelementptr inbounds i32, i32 addrspace(1)* %1, i32 %2
885
896
store i32 %conv, i32 addrspace(1)* %arrayidx, align 4
886
- @@ -243 ,19 +293 ,19 @@ declare i32 @__enqueue_kernel_basic(%opencl.queue_t*, i32, %struct.ndrange_t*, i
897
+ @@ -261 ,19 +300 ,19 @@ declare i32 @__enqueue_kernel_basic(%opencl.queue_t*, i32, %struct.ndrange_t*, i
887
898
define internal spir_func void @__device_side_enqueue_block_invoke_2(i8 addrspace(4)* %.block_descriptor) #2 {
888
899
entry:
889
900
%.block_descriptor.addr = alloca i8 addrspace(4)*, align 4
@@ -910,7 +921,7 @@ index 1f0b360..761043e 100644
910
921
%4 = load i32, i32 addrspace(4)* %block.capture.addr3, align 4
911
922
%arrayidx4 = getelementptr inbounds i32, i32 addrspace(1)* %3, i32 %4
912
923
store i32 %2, i32 addrspace(1)* %arrayidx4, align 4
913
- @@ -276 ,11 +326 ,11 @@ define internal spir_func void @__device_side_enqueue_block_invoke_3(i8 addrspac
924
+ @@ -294 ,11 +333 ,11 @@ define internal spir_func void @__device_side_enqueue_block_invoke_3(i8 addrspac
914
925
entry:
915
926
%.block_descriptor.addr = alloca i8 addrspace(4)*, align 4
916
927
%p.addr = alloca i8 addrspace(3)*, align 4
@@ -925,7 +936,7 @@ index 1f0b360..761043e 100644
925
936
ret void
926
937
}
927
938
928
- @@ -300 ,13 +350 ,13 @@ entry:
939
+ @@ -318 ,13 +357 ,13 @@ entry:
929
940
%p1.addr = alloca i8 addrspace(3)*, align 4
930
941
%p2.addr = alloca i8 addrspace(3)*, align 4
931
942
%p3.addr = alloca i8 addrspace(3)*, align 4
@@ -942,9 +953,9 @@ index 1f0b360..761043e 100644
942
953
ret void
943
954
}
944
955
945
- @@ -329,27 +379,20 @@ declare i32 @__enqueue_kernel_varargs(%opencl.queue_t*, i32, %struct.ndrange_t*,
946
- ; CHECK-LLVM-DAG: define spir_kernel void @__device_side_enqueue_block_invoke_3_kernel(i8 addrspace(4)*, i8 addrspace(3)*)
956
+ @@ -379,27 +418,20 @@ entry:
947
957
; CHECK-LLVM-DAG: define spir_kernel void @__device_side_enqueue_block_invoke_4_kernel(i8 addrspace(4)*, i8 addrspace(3)*, i8 addrspace(3)*, i8 addrspace(3)*)
958
+ ; CHECK-LLVM-DAG: define spir_kernel void @__device_side_enqueue_block_invoke_5_kernel(i8 addrspace(4)*)
948
959
949
960
- attributes #0 = { convergent noinline nounwind optnone "correctly-rounded-divide-sqrt-fp-math"="false" "disable-tail-calls"="false" "less-precise-fpmad"="false" "no-frame-pointer-elim"="false" "no-infs-fp-math"="false" "no-jump-tables"="false" "no-nans-fp-math"="false" "no-signed-zeros-fp-math"="false" "no-trapping-math"="false" "stack-protector-buffer-size"="8" "uniform-work-group-size"="false" "unsafe-fp-math"="false" "use-soft-float"="false" }
950
961
+ attributes #0 = { convergent noinline nounwind optnone "correctly-rounded-divide-sqrt-fp-math"="false" "denorms-are-zero"="false" "disable-tail-calls"="false" "less-precise-fpmad"="false" "min-legal-vector-width"="0" "no-frame-pointer-elim"="false" "no-infs-fp-math"="false" "no-jump-tables"="false" "no-nans-fp-math"="false" "no-signed-zeros-fp-math"="false" "no-trapping-math"="false" "stack-protector-buffer-size"="8" "uniform-work-group-size"="false" "unsafe-fp-math"="false" "use-soft-float"="false" }
0 commit comments