Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
9 changes: 9 additions & 0 deletions src/infiniop/devices/handle.cc
Original file line number Diff line number Diff line change
Expand Up @@ -23,6 +23,9 @@
#ifdef ENABLE_METAX_API
#include "metax/metax_handle.h"
#endif
#ifdef ENABLE_OPENCL_API
#include "opencl/opencl_handle.h"
#endif

__C infiniStatus_t infiniopCreateHandle(infiniopHandle_t *handle_ptr) {
if (handle_ptr == nullptr) {
Expand Down Expand Up @@ -68,6 +71,9 @@ __C infiniStatus_t infiniopCreateHandle(infiniopHandle_t *handle_ptr) {
#ifdef ENABLE_HYGON_API
CREATE(INFINI_DEVICE_HYGON, hygon);
#endif
#ifdef ENABLE_OPENCL_API
CREATE(INFINI_DEVICE_OPENCL, opencl);
#endif

default:
return INFINI_STATUS_DEVICE_TYPE_NOT_SUPPORTED;
Expand Down Expand Up @@ -113,6 +119,9 @@ __C infiniStatus_t infiniopDestroyHandle(infiniopHandle_t handle) {
#endif
#ifdef ENABLE_HYGON_API
DELETE(INFINI_DEVICE_HYGON, hygon);
#endif
#ifdef ENABLE_OPENCL_API
DELETE(INFINI_DEVICE_OPENCL, opencl);
#endif
default:
return INFINI_STATUS_DEVICE_TYPE_NOT_SUPPORTED;
Expand Down
39 changes: 39 additions & 0 deletions src/infiniop/devices/opencl/opencl_common.h
Original file line number Diff line number Diff line change
@@ -0,0 +1,39 @@
#ifndef __INFINIOP_OPENCL_COMMON_H__
#define __INFINIOP_OPENCL_COMMON_H__

#include "../../../utils.h"
#include "../pool.h"
#include "opencl_handle.h"
#include "opencl_kernel_common.h"
#include "opencl_program_cache.h"
#include <functional>
#include <vector>

namespace device::opencl {

class Handle::Internal {

int _warp_size,
_max_threads_per_block,
_block_size[3];

template <typename T>
using Fn = std::function<infiniStatus_t(T)>;

public:
Internal(int);

int warpSize() const;
int maxThreadsPerBlock() const;
int blockSizeX() const;
int blockSizeY() const;
int blockSizeZ() const;
ProgramCache *programCache() const;

private:
std::unique_ptr<ProgramCache> program_cache_;
};

} // namespace device::opencl

#endif // __INFINIOP_OPENCL_COMMON_H__
59 changes: 59 additions & 0 deletions src/infiniop/devices/opencl/opencl_handle.cc
Original file line number Diff line number Diff line change
@@ -0,0 +1,59 @@
#include "../../../infinirt/opencl/infinirt_opencl.h"
#include "opencl_common.h"

namespace device::opencl {
Handle::Handle(infiniDevice_t device, int device_id)
: InfiniopHandle{device, device_id},
_internal(std::make_shared<Handle::Internal>(device_id)) {}

Handle::Handle(int device_id) : Handle(INFINI_DEVICE_OPENCL, device_id) {}

auto Handle::internal() const -> const std::shared_ptr<Internal> & {
return _internal;
}

Handle::Internal::Internal(int device_id) {
infinirtInit();
cl_device_id cl_device;
infinirtOpenclDevice_t device;
infinirtGetOpenclDevice(&device);
cl_device = static_cast<cl_device_id>(device);

#if defined(INTEL)
_warp_size = 32;
#elif defined(ADRENO)
_warp_size = 128;
#else
_warp_size = 32;
#endif

size_t device_max_wg = 0;
clGetDeviceInfo(cl_device, CL_DEVICE_MAX_WORK_GROUP_SIZE, sizeof(device_max_wg), &device_max_wg, nullptr);
_max_threads_per_block = static_cast<int>(device_max_wg);

size_t max_item_sizes[3];
clGetDeviceInfo(cl_device, CL_DEVICE_MAX_WORK_ITEM_SIZES, sizeof(max_item_sizes), max_item_sizes, nullptr);
_block_size[0] = max_item_sizes[0];
_block_size[1] = max_item_sizes[1];
_block_size[2] = max_item_sizes[2];
program_cache_ = std::make_unique<ProgramCache>();
}

int Handle::Internal::warpSize() const { return _warp_size; }

int Handle::Internal::maxThreadsPerBlock() const { return _max_threads_per_block; }

int Handle::Internal::blockSizeX() const { return _block_size[0]; }

int Handle::Internal::blockSizeY() const { return _block_size[1]; }

int Handle::Internal::blockSizeZ() const { return _block_size[2]; }

ProgramCache *Handle::Internal::programCache() const { return program_cache_.get(); }

infiniStatus_t Handle::create(InfiniopHandle **handle_ptr, int device_id) {
*handle_ptr = new Handle(INFINI_DEVICE_OPENCL, device_id);
return INFINI_STATUS_SUCCESS;
}

} // namespace device::opencl
28 changes: 28 additions & 0 deletions src/infiniop/devices/opencl/opencl_handle.h
Original file line number Diff line number Diff line change
@@ -0,0 +1,28 @@
#ifndef __INFINIOP_OPENCL_HANDLE_H__
#define __INFINIOP_OPENCL_HANDLE_H__

#include "../../handle.h"
#include <memory>

namespace device {
namespace opencl {

struct Handle : public InfiniopHandle {
Handle(int device_id);
class Internal;
auto internal() const -> const std::shared_ptr<Internal> &;

protected:
Handle(infiniDevice_t device, int device_id);

public:
static infiniStatus_t create(InfiniopHandle **handle_ptr, int device_id);

private:
std::shared_ptr<Internal> _internal;
};

} // namespace opencl
} // namespace device

#endif // __INFINIOP_OPENCL_HANDLE_H__
225 changes: 225 additions & 0 deletions src/infiniop/devices/opencl/opencl_kernel_common.h
Original file line number Diff line number Diff line change
@@ -0,0 +1,225 @@
#ifndef __INFINIOP_OPENCL_KERNEL_COMMON_H__
#define __INFINIOP_OPENCL_KERNEL_COMMON_H__

#include "infinicore.h"
#include <cstddef>
#include <string>

#ifndef CL_TARGET_OPENCL_VERSION
#define CL_TARGET_OPENCL_VERSION 300
#endif
#include <CL/cl.h>

namespace device::opencl::kernel {

inline size_t dtypeSize(infiniDtype_t dtype) {
switch (dtype) {
case INFINI_DTYPE_BYTE:
return 1;
case INFINI_DTYPE_BOOL:
return 1;
case INFINI_DTYPE_I8:
return 1;
case INFINI_DTYPE_U8:
return 1;
case INFINI_DTYPE_I16:
return 2;
case INFINI_DTYPE_U16:
return 2;
case INFINI_DTYPE_F16:
return 2;
case INFINI_DTYPE_I32:
return 4;
case INFINI_DTYPE_U32:
return 4;
case INFINI_DTYPE_F32:
return 4;
case INFINI_DTYPE_I64:
return 8;
case INFINI_DTYPE_U64:
return 8;
case INFINI_DTYPE_F64:
return 8;
default:
return 0;
}
}

inline size_t indexToOffset(
size_t flat_index,
size_t ndim,
const size_t *shape,
const ptrdiff_t *strides) {
size_t res = 0;
for (size_t i = ndim; i-- > 0;) {
res += (flat_index % shape[i]) * strides[i];
flat_index /= shape[i];
}
return res;
}

inline const char *clErrorString(cl_int err) {
switch (err) {
case CL_SUCCESS:
return "CL_SUCCESS";
case CL_DEVICE_NOT_FOUND:
return "CL_DEVICE_NOT_FOUND";
case CL_DEVICE_NOT_AVAILABLE:
return "CL_DEVICE_NOT_AVAILABLE";
case CL_COMPILER_NOT_AVAILABLE:
return "CL_COMPILER_NOT_AVAILABLE";
case CL_MEM_OBJECT_ALLOCATION_FAILURE:
return "CL_MEM_OBJECT_ALLOCATION_FAILURE";
case CL_OUT_OF_RESOURCES:
return "CL_OUT_OF_RESOURCES";
case CL_OUT_OF_HOST_MEMORY:
return "CL_OUT_OF_HOST_MEMORY";
case CL_PROFILING_INFO_NOT_AVAILABLE:
return "CL_PROFILING_INFO_NOT_AVAILABLE";
case CL_MEM_COPY_OVERLAP:
return "CL_MEM_COPY_OVERLAP";
case CL_IMAGE_FORMAT_MISMATCH:
return "CL_IMAGE_FORMAT_MISMATCH";
case CL_IMAGE_FORMAT_NOT_SUPPORTED:
return "CL_IMAGE_FORMAT_NOT_SUPPORTED";
case CL_BUILD_PROGRAM_FAILURE:
return "CL_BUILD_PROGRAM_FAILURE";
case CL_MAP_FAILURE:
return "CL_MAP_FAILURE";
case CL_INVALID_VALUE:
return "CL_INVALID_VALUE";
case CL_INVALID_DEVICE_TYPE:
return "CL_INVALID_DEVICE_TYPE";
case CL_INVALID_PLATFORM:
return "CL_INVALID_PLATFORM";
case CL_INVALID_DEVICE:
return "CL_INVALID_DEVICE";
case CL_INVALID_CONTEXT:
return "CL_INVALID_CONTEXT";
case CL_INVALID_QUEUE_PROPERTIES:
return "CL_INVALID_QUEUE_PROPERTIES";
case CL_INVALID_COMMAND_QUEUE:
return "CL_INVALID_COMMAND_QUEUE";
case CL_INVALID_HOST_PTR:
return "CL_INVALID_HOST_PTR";
case CL_INVALID_MEM_OBJECT:
return "CL_INVALID_MEM_OBJECT";
case CL_INVALID_IMAGE_FORMAT_DESCRIPTOR:
return "CL_INVALID_IMAGE_FORMAT_DESCRIPTOR";
case CL_INVALID_IMAGE_SIZE:
return "CL_INVALID_IMAGE_SIZE";
case CL_INVALID_SAMPLER:
return "CL_INVALID_SAMPLER";
case CL_INVALID_BINARY:
return "CL_INVALID_BINARY";
case CL_INVALID_BUILD_OPTIONS:
return "CL_INVALID_BUILD_OPTIONS";
case CL_INVALID_PROGRAM:
return "CL_INVALID_PROGRAM";
case CL_INVALID_PROGRAM_EXECUTABLE:
return "CL_INVALID_PROGRAM_EXECUTABLE";
case CL_INVALID_KERNEL_NAME:
return "CL_INVALID_KERNEL_NAME";
case CL_INVALID_KERNEL_DEFINITION:
return "CL_INVALID_KERNEL_DEFINITION";
case CL_INVALID_KERNEL:
return "CL_INVALID_KERNEL";
case CL_INVALID_ARG_INDEX:
return "CL_INVALID_ARG_INDEX";
case CL_INVALID_ARG_VALUE:
return "CL_INVALID_ARG_VALUE";
case CL_INVALID_ARG_SIZE:
return "CL_INVALID_ARG_SIZE";
case CL_INVALID_KERNEL_ARGS:
return "CL_INVALID_KERNEL_ARGS";
case CL_INVALID_WORK_DIMENSION:
return "CL_INVALID_WORK_DIMENSION";
case CL_INVALID_WORK_GROUP_SIZE:
return "CL_INVALID_WORK_GROUP_SIZE";
case CL_INVALID_WORK_ITEM_SIZE:
return "CL_INVALID_WORK_ITEM_SIZE";
case CL_INVALID_GLOBAL_OFFSET:
return "CL_INVALID_GLOBAL_OFFSET";
case CL_INVALID_EVENT_WAIT_LIST:
return "CL_INVALID_EVENT_WAIT_LIST";
case CL_INVALID_EVENT:
return "CL_INVALID_EVENT";
case CL_INVALID_OPERATION:
return "CL_INVALID_OPERATION";
case CL_INVALID_GL_OBJECT:
return "CL_INVALID_GL_OBJECT";
case CL_INVALID_BUFFER_SIZE:
return "CL_INVALID_BUFFER_SIZE";
case CL_INVALID_MIP_LEVEL:
return "CL_INVALID_MIP_LEVEL";
case CL_INVALID_GLOBAL_WORK_SIZE:
return "CL_INVALID_GLOBAL_WORK_SIZE";
default:
return "UNKNOWN_CL_ERROR";
}
}

inline bool dtypeToClType(infiniDtype_t dt, std::string &out) noexcept {
switch (dt) {
case INFINI_DTYPE_INVALID:
return false;
case INFINI_DTYPE_BYTE:
return false;
case INFINI_DTYPE_BOOL:
out = "bool";
return true;
case INFINI_DTYPE_I8:
out = "char";
return true;
case INFINI_DTYPE_I16:
out = "short";
return true;
case INFINI_DTYPE_I32:
out = "int";
return true;
case INFINI_DTYPE_I64:
out = "long";
return true;
case INFINI_DTYPE_U8:
out = "uchar";
return true;
case INFINI_DTYPE_U16:
out = "ushort";
return true;
case INFINI_DTYPE_U32:
out = "uint";
return true;
case INFINI_DTYPE_U64:
out = "ulong";
return true;
case INFINI_DTYPE_F8:
return false;
case INFINI_DTYPE_F16:
// half 需要 cl_khr_fp16 支持
out = "half";
return true;
case INFINI_DTYPE_F32:
out = "float";
return true;
case INFINI_DTYPE_F64:
// double 需要 cl_khr_fp64 支持
out = "double";
return true;
case INFINI_DTYPE_C16:
return false;
case INFINI_DTYPE_C32:
return false;
case INFINI_DTYPE_C64:
return false;
case INFINI_DTYPE_C128:
return false;
case INFINI_DTYPE_BF16:
return false;
default:
return false;
}
}

} // namespace device::opencl::kernel

#endif
Loading