Open
Description
I'm working on Julia support for oneAPI, and after upgrading to an A770 I noticed that printing an empty string segfaults.
Julia MWE:
using oneAPI
function kernel()
oneAPI.@printf("")
return
end
@oneapi kernel()
synchronize()
signal (11): Segmentation fault
in expression starting at /home/tim/Julia/pkg/oneAPI/wip.jl:13
strnlen_s at /workspace/srcdir/compute-runtime/shared/source/helpers/string.h:40
printString at /workspace/srcdir/compute-runtime/shared/source/program/print_formatter.cpp:54
printKernelOutput at /workspace/srcdir/compute-runtime/shared/source/program/print_formatter.cpp:48
printOutput at /workspace/srcdir/compute-runtime/level_zero/core/source/printf_handler/printf_handler.cpp:76
printPrintfOutput at /workspace/srcdir/compute-runtime/level_zero/core/source/kernel/kernel_imp.cpp:974
printKernelsPrintfOutput at /workspace/srcdir/compute-runtime/level_zero/core/source/cmdqueue/cmdqueue.cpp:178
postSyncOperations at /workspace/srcdir/compute-runtime/level_zero/core/source/cmdqueue/cmdqueue.cpp:184
synchronizeByPollingForTaskCount at /workspace/srcdir/compute-runtime/level_zero/core/source/cmdqueue/cmdqueue.cpp:171
synchronize at /workspace/srcdir/compute-runtime/level_zero/core/source/cmdqueue/cmdqueue.cpp:147
zeCommandQueueSynchronize at /workspace/srcdir/compute-runtime/level_zero/api/core/ze_cmdqueue_api_entrypoints.h:39
macro expansion at /home/tim/Julia/pkg/oneAPI/lib/level-zero/libze.jl:1556 [inlined]
macro expansion at /home/tim/Julia/pkg/oneAPI/lib/level-zero/utils.jl:5 [inlined]
macro expansion at /home/tim/Julia/pkg/oneAPI/lib/level-zero/libze.jl:13 [inlined]
zeCommandQueueSynchronize at /home/tim/Julia/pkg/oneAPI/lib/utils/call.jl:24
synchronize at /home/tim/Julia/pkg/oneAPI/lib/level-zero/cmdqueue.jl:34 [inlined]
synchronize at /home/tim/Julia/pkg/oneAPI/lib/level-zero/cmdqueue.jl:34 [inlined]
synchronize at /home/tim/Julia/pkg/oneAPI/src/context.jl:59
main at /home/tim/Julia/pkg/oneAPI/wip.jl:10
unknown function (ip: 0x7ff371f1767f)
The kernel above generates the following LLVM IR:
julia> oneAPI.code_llvm(kernel, Tuple{}; kernel=true, dump_module=true, debuginfo=:none)
; ModuleID = 'text'
source_filename = "text"
target datalayout = "e-i64:64-v16:16-v24:32-v32:32-v48:64-v96:128-v192:256-v256:256-v512:512-v1024:1024"
target triple = "spir64-unknown-unknown"
@0 = private unnamed_addr constant [1 x i8] zeroinitializer, align 1
declare i32 @printf(i8*, ...)
define spir_kernel void @_Z6kernel() local_unnamed_addr #0 {
conversion:
%0 = call i32 (i8*, ...) @printf(i8* getelementptr inbounds ([1 x i8], [1 x i8]* @0, i64 0, i64 0))
ret void
}
attributes #0 = { "probe-stack"="inline-asm" }
!llvm.module.flags = !{!0, !1}
!opencl.ocl.version = !{!2}
!opencl.spirv.version = !{!3}
!julia.kernel = !{!4}
!0 = !{i32 2, !"Dwarf Version", i32 4}
!1 = !{i32 2, !"Debug Info Version", i32 3}
!2 = !{i32 2, i32 0}
!3 = !{i32 1, i32 5}
!4 = !{void ()* @_Z6kernel}
... which we translate to SPIR-V using the Khronos translator:
; SPIR-V
; Version: 1.0
; Generator: Khronos LLVM/SPIR-V Translator; 14
; Bound: 18
; Schema: 0
OpCapability Addresses
OpCapability Kernel
OpCapability Int64
OpCapability Int8
%1 = OpExtInstImport "OpenCL.std"
OpMemoryModel Physical64 OpenCL
OpEntryPoint Kernel %11 "_Z6kernel"
OpSource OpenCL_C 200000
OpName %conversion "conversion"
OpDecorate %8 Constant
OpDecorate %8 Alignment 1
%ulong = OpTypeInt 64 0
%uchar = OpTypeInt 8 0
%uint = OpTypeInt 32 0
%ulong_1 = OpConstant %ulong 1
%ulong_0 = OpConstant %ulong 0
%_arr_uchar_ulong_1 = OpTypeArray %uchar %ulong_1
%_ptr_Function__arr_uchar_ulong_1 = OpTypePointer Function %_arr_uchar_ulong_1
%void = OpTypeVoid
%10 = OpTypeFunction %void
%_ptr_Function_uchar = OpTypePointer Function %uchar
%6 = OpConstantNull %_arr_uchar_ulong_1
%8 = OpVariable %_ptr_Function__arr_uchar_ulong_1 Function %6
%11 = OpFunction %void None %10
%conversion = OpLabel
%15 = OpInBoundsPtrAccessChain %_ptr_Function_uchar %8 %ulong_0 %ulong_0
%17 = OpExtInst %uint %1 printf %15
OpReturn
OpFunctionEnd
The compiled SPIR-V kernel is attached, and can be loaded (after extracting) using the following C-based loader:
#include <assert.h>
#include <stdio.h>
#include <stdlib.h>
#include <string.h>
#include <level_zero/ze_api.h>
#include <level_zero/zet_api.h>
void read_spirv_binary(const char *filename, uint8_t **spirv, size_t *spirv_size) {
FILE *file = fopen(filename, "rb");
assert(file != NULL);
fseek(file, 0, SEEK_END);
*spirv_size = ftell(file);
fseek(file, 0, SEEK_SET);
*spirv = (uint8_t *)malloc(*spirv_size);
assert(*spirv != NULL);
size_t bytes_read = fread(*spirv, 1, *spirv_size, file);
assert(bytes_read == *spirv_size);
fclose(file);
}
int main(int argc, char *argv[]) {
if (argc != 2) {
fprintf(stderr, "Usage: %s <path to SPIR-V binary>\n", argv[0]);
return 1;
}
ze_result_t result = zeInit(0);
assert(result == ZE_RESULT_SUCCESS);
uint32_t driver_count = 0;
result = zeDriverGet(&driver_count, NULL);
assert(result == ZE_RESULT_SUCCESS);
assert(driver_count > 0);
ze_driver_handle_t driver;
result = zeDriverGet(&driver_count, &driver);
assert(result == ZE_RESULT_SUCCESS);
uint32_t device_count = 0;
result = zeDeviceGet(driver, &device_count, NULL);
assert(result == ZE_RESULT_SUCCESS);
assert(device_count > 0);
ze_device_handle_t device;
result = zeDeviceGet(driver, &device_count, &device);
assert(result == ZE_RESULT_SUCCESS);
ze_context_handle_t context;
ze_context_desc_t context_desc = {
.stype = ZE_STRUCTURE_TYPE_CONTEXT_DESC,
.pNext = NULL,
.flags = 0
};
result = zeContextCreate(driver, &context_desc, &context);
assert(result == ZE_RESULT_SUCCESS);
uint8_t *spirv;
size_t spirv_size;
read_spirv_binary(argv[1], &spirv, &spirv_size);
ze_module_handle_t module;
ze_module_desc_t module_desc = {
.stype = ZE_STRUCTURE_TYPE_MODULE_DESC,
.pNext = NULL,
.format = ZE_MODULE_FORMAT_IL_SPIRV,
.inputSize = spirv_size,
.pInputModule = spirv,
.pBuildFlags = ""
};
result = zeModuleCreate(context, device, &module_desc, &module, NULL);
assert(result == ZE_RESULT_SUCCESS);
ze_kernel_handle_t kernel;
ze_kernel_desc_t kernel_desc = {
.stype = ZE_STRUCTURE_TYPE_KERNEL_DESC,
.pNext = NULL,
.flags = 0,
.pKernelName = "_Z6kernel"
};
result = zeKernelCreate(module, &kernel_desc, &kernel);
assert(result == ZE_RESULT_SUCCESS);
ze_command_queue_handle_t cmd_queue;
ze_command_queue_desc_t cmd_queue_desc = {
.stype = ZE_STRUCTURE_TYPE_COMMAND_QUEUE_DESC,
.pNext = NULL,
.ordinal = 0,
.index = 0,
.flags = 0,
.mode = ZE_COMMAND_QUEUE_MODE_DEFAULT,
.priority = ZE_COMMAND_QUEUE_PRIORITY_NORMAL
};
result = zeCommandQueueCreate(context, device, &cmd_queue_desc, &cmd_queue);
assert(result == ZE_RESULT_SUCCESS);
ze_command_list_handle_t cmd_list;
ze_command_list_desc_t cmd_list_desc = {
.stype = ZE_STRUCTURE_TYPE_COMMAND_LIST_DESC,
.pNext = NULL,
.commandQueueGroupOrdinal = 0,
.flags = 0
};
result = zeCommandListCreate(context, device, &cmd_list_desc, &cmd_list);
assert(result == ZE_RESULT_SUCCESS);
ze_group_count_t group_count = {
.groupCountX = 1,
.groupCountY = 1,
.groupCountZ = 1
};
result = zeCommandListAppendLaunchKernel(cmd_list, kernel, &group_count, NULL, 0, NULL);
assert(result == ZE_RESULT_SUCCESS);
result = zeCommandListClose(cmd_list);
assert(result == ZE_RESULT_SUCCESS);
result = zeCommandQueueExecuteCommandLists(cmd_queue, 1, &cmd_list, NULL);
assert(result == ZE_RESULT_SUCCESS);
result = zeCommandQueueSynchronize(cmd_queue, UINT32_MAX);
assert(result == ZE_RESULT_SUCCESS);
zeKernelDestroy(kernel);
zeModuleDestroy(module);
free(spirv);
zeCommandListDestroy(cmd_list);
zeCommandQueueDestroy(cmd_queue);
zeContextDestroy(context);
return 0;
}
Tested on Linux 6.2.11, both using compute-runtime 22.43.24595.30 from the Arch Linux repos as our own build of 22.53.25593. Printing non-empty strings works, as does printing empty strings on another system of mine (a NUC with Xe graphics, running Linux 5.10 with compute-runtime 22.53.25593).