@@ -117,32 +117,12 @@ pub(crate) fn gen_image_wrapper_module<'ll>(
117
117
llvm:: set_section ( llglobal, & c_section_name) ;
118
118
llvm:: set_alignment ( llglobal, Align :: EIGHT ) ;
119
119
120
- //@.omp_offloading.device_image = internal unnamed_addr constant [4040 x i8] c"111111111", section ".llvm.offloading", align 8
121
- // TODO
122
- // @.omp_offloading.device_images = internal unnamed_addr constant [1 x %__tgt_device_image] [%__tgt_device_image { ptr getelementptr ([4040 x i8], ptr @.omp_offloading.device_image, i64 0, i64 144), ptr getelementptr ([4040 x i8], ptr @.omp_offloading.device_image, i64 0, i64 4040), ptr @__start_omp_offloading_entries, ptr @__stop_omp_offloading_entries }]
123
- // @.omp_offloading.descriptor = internal constant %__tgt_bin_desc { i32 1, ptr @.omp_offloading.device_images, ptr @__start_omp_offloading_entries, ptr @__stop_omp_offloading_entries }
124
- // @llvm.global_ctors = appending global [1 x { i32, ptr, ptr }] [{ i32, ptr, ptr } { i32 101, ptr @.omp_offloading.descriptor_reg, ptr null }]
125
- //
126
- // define internal void @.omp_offloading.descriptor_reg() section ".text.startup" {
127
- // entry:
128
- // call void @__tgt_register_lib(ptr @.omp_offloading.descriptor)
129
- // %0 = call i32 @atexit(ptr @.omp_offloading.descriptor_unreg)
130
- // ret void
131
- // }
132
- //
133
- // define internal void @.omp_offloading.descriptor_unreg() section ".text.startup" {
134
- // entry:
135
- // call void @__tgt_unregister_lib(ptr @.omp_offloading.descriptor)
136
- // ret void
137
- // }
138
-
139
120
llvm:: LLVMPrintModuleToFile (
140
121
llmod,
141
122
CString :: new ( "rustmagic.openmp.image.wrapper.ll" ) . unwrap ( ) . as_ptr ( ) ,
142
123
std:: ptr:: null_mut ( ) ,
143
124
) ;
144
125
145
- // Clean up
146
126
llvm:: LLVMDisposeModule ( llmod) ;
147
127
llvm:: LLVMContextDispose ( llcx) ;
148
128
}
@@ -154,7 +134,6 @@ pub(crate) fn handle_gpu_code<'ll>(
154
134
) {
155
135
let ( offload_entry_ty, at_one, begin, update, end, tgt_bin_desc, fn_ty) = gen_globals ( & cx) ;
156
136
157
- dbg ! ( "created struct" ) ;
158
137
let mut o_types = vec ! [ ] ;
159
138
let mut kernels = vec ! [ ] ;
160
139
for num in 0 ..9 {
@@ -164,13 +143,17 @@ pub(crate) fn handle_gpu_code<'ll>(
164
143
kernels. push ( kernel) ;
165
144
}
166
145
}
167
- dbg ! ( "gen_call_handling" ) ;
168
146
gen_call_handling ( & cx, & kernels, at_one, begin, update, end, tgt_bin_desc, fn_ty, & o_types) ;
169
147
gen_image_wrapper_module ( & cgcx, & cx) ;
170
- // In a follow-up PR, we will enable gpu device code generation.
171
- //gen_asdf(&cgcx, &cx);
172
148
}
173
149
150
+ // The meaning of the __tgt_offload_entry (as per llvm docs) is
151
+ // Type, Identifier, Description
152
+ // void*, addr, Address of global symbol within device image (function or global)
153
+ // char*, name, Name of the symbol
154
+ // size_t, size, Size of the entry info (0 if it is a function)
155
+ // int32_t, flags, Flags associated with the entry (see Target Region Entry Flags)
156
+ // int32_t, reserved, Reserved, to be used by the runtime library.
174
157
fn add_tgt_offload_entry < ' ll > ( cx : & ' ll SimpleCx < ' _ > ) -> & ' ll llvm:: Type {
175
158
let offload_entry_ty = cx. type_named_struct ( "struct.__tgt_offload_entry" ) ;
176
159
let tptr = cx. type_ptr ( ) ;
@@ -228,6 +211,7 @@ fn gen_globals<'ll>(
228
211
let tgt_bin_desc_name = cx. type_named_struct ( "struct.__tgt_bin_desc" ) ;
229
212
cx. set_struct_body ( tgt_bin_desc_name, & tgt_bin_desc_ty, false ) ;
230
213
214
+ // For each kernel to run on the gpu, we will later generate one entry of this type.
231
215
// coppied from LLVM
232
216
// typedef struct {
233
217
// uint64_t Reserved;
@@ -244,25 +228,15 @@ fn gen_globals<'ll>(
244
228
vec ! [ ti32, ti32, tptr, tptr, tptr, tptr, tptr, tptr, ti64, ti64, tarr, tarr, ti32] ;
245
229
246
230
cx. set_struct_body ( kernel_arguments_ty, & kernel_elements, false ) ;
231
+ // For now we don't handle kernels, so for now we just add a global dummy
232
+ // to make sure that the __tgt_offload_entrr is defined and handled correctly.
247
233
cx. declare_global ( "my_struct_global2" , kernel_arguments_ty) ;
248
- //@my_struct_global = external global %struct.__tgt_offload_entry
249
- //@my_struct_global2 = external global %struct.__tgt_kernel_arguments
250
- dbg ! ( & kernel_arguments_ty) ;
251
- //LLVMTypeRef elements[9] = {i64Ty, i16Ty, i16Ty, i32Ty, ptrTy, ptrTy, i64Ty, i64Ty, ptrTy};
252
- //LLVMStructSetBody(structTy, elements, 9, 0);
253
-
254
- // New, to test memtransfer
255
- // ; Function Attrs: nounwind
256
- // declare void @__tgt_target_data_begin_mapper(ptr, i64, i32, ptr, ptr, ptr, ptr, ptr, ptr) #3
257
- //
258
- // ; Function Attrs: nounwind
259
- // declare void @__tgt_target_data_update_mapper(ptr, i64, i32, ptr, ptr, ptr, ptr, ptr, ptr) #3
260
- //
261
- // ; Function Attrs: nounwind
262
- // declare void @__tgt_target_data_end_mapper(ptr, i64, i32, ptr, ptr, ptr, ptr, ptr, ptr) #3
263
234
235
+ // Move data to the gpu
264
236
let mapper_begin = "__tgt_target_data_begin_mapper" ;
237
+ // Update data on the gpu, currently not used.
265
238
let mapper_update = String :: from ( "__tgt_target_data_update_mapper" ) ;
239
+ // Move data from the GPU
266
240
let mapper_end = String :: from ( "__tgt_target_data_end_mapper" ) ;
267
241
let args = vec ! [ tptr, ti64, ti32, tptr, tptr, tptr, tptr, tptr, tptr] ;
268
242
let mapper_fn_ty = cx. type_func ( & args, cx. type_void ( ) ) ;
@@ -361,8 +335,6 @@ fn gen_define_handling<'ll>(
361
335
// Next: For each function, generate these three entries. A weak constant,
362
336
// the llvm.rodata entry name, and the omp_offloading_entries value
363
337
364
- // @.__omp_offloading_86fafab6_c40006a1__Z3fooPSt7complexIdES1_S0_m_l7.region_id = weak constant i8 0
365
- // @.offloading.entry_name = internal unnamed_addr constant [66 x i8] c"__omp_offloading_86fafab6_c40006a1__Z3fooPSt7complexIdES1_S0_m_l7\00", section ".llvm.rodata.offloading", align 1
366
338
let name = format ! ( ".kernel_{num}.region_id" ) ;
367
339
let initializer = cx. get_const_i8 ( 0 ) ;
368
340
let region_id = add_unnamed_global ( & cx, & name, initializer, WeakAnyLinkage ) ;
@@ -402,17 +374,6 @@ fn gen_define_handling<'ll>(
402
374
llvm:: set_alignment ( llglobal, Align :: ONE ) ;
403
375
let c_section_name = CString :: new ( ".omp_offloading_entries" ) . unwrap ( ) ;
404
376
llvm:: set_section ( llglobal, & c_section_name) ;
405
- // rustc
406
- // @.offloading.entry.kernel_3 = weak constant %struct.__tgt_offload_entry { i64 0, i16 1, i16 1, i32 0, ptr @.kernel_3.region_id, ptr @.offloading.entry_name.3, i64 0, i64 0, ptr null }, section ".omp_offloading_entries", align 1
407
- // clang
408
- // @.offloading.entry.__omp_offloading_86fafab6_c40006a1__Z3fooPSt7complexIdES1_S0_m_l7 = weak constant %struct.__tgt_offload_entry { i64 0, i16 1, i16 1, i32 0, ptr @.__omp_offloading_86fafab6_c40006a1__Z3fooPSt7complexIdES1_S0_m_l7.region_id, ptr @.offloading.entry_name, i64 0, i64 0, ptr null }, section "omp_offloading_entries", align 1
409
-
410
- //
411
- // 1. @.offload_sizes.{num} = private unnamed_addr constant [4 x i64] [i64 8, i64 0, i64 16, i64 0]
412
- // 2. @.offload_maptypes
413
- // 3. @.__omp_offloading_<hash>_fnc_name_<hash> = weak constant i8 0
414
- // 4. @.offloading.entry_name = internal unnamed_addr constant [66 x i8] c"__omp_offloading_86fafab6_c40006a1__Z3fooPSt7complexIdES1_S0_m_l7\00", section ".llvm.rodata.offloading", align 1
415
- // 5. @.offloading.entry.__omp_offloading_86fafab6_c40006a1__Z3fooPSt7complexIdES1_S0_m_l7 = weak constant %struct.__tgt_offload_entry { i64 0, i16 1, i16 1, i32 0, ptr @.__omp_offloading_86fafab6_c40006a1__Z3fooPSt7complexIdES1_S0_m_l7.region_id, ptr @.offloading.entry_name, i64 0, i64 0, ptr null }, section "omp_offloading_entries", align 1
416
377
o_types
417
378
}
418
379
@@ -454,7 +415,6 @@ fn gen_call_handling<'ll>(
454
415
llvm:: LLVMRustGetFunctionCall ( main_fn, kernel_name. as_c_char_ptr ( ) , kernel_name. len ( ) )
455
416
} ;
456
417
let kernel_call = if call. is_some ( ) {
457
- dbg ! ( "found kernel call" ) ;
458
418
call. unwrap ( )
459
419
} else {
460
420
return ;
@@ -464,7 +424,6 @@ fn gen_call_handling<'ll>(
464
424
let mut builder = SBuilder :: build ( cx, kernel_call_bb) ;
465
425
466
426
let types = cx. func_params_types ( cx. get_type_of_global ( called) ) ;
467
- dbg ! ( & types) ;
468
427
let num_args = types. len ( ) as u64 ;
469
428
470
429
// Step 0)
0 commit comments