forked from ggml-org/llama.cpp
-
Notifications
You must be signed in to change notification settings - Fork 1
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
- Loading branch information
Showing
11 changed files
with
384 additions
and
31 deletions.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -1,19 +1,46 @@ | ||
if(NOT SOC_VERSION) | ||
set(SOC_VERSION "ascend910b3") | ||
if (NOT SOC_TYPE) | ||
set (SOC_TYPE "Ascend910B3") | ||
endif() | ||
set(ASCEND_CANN_PACKAGE_PATH ${CANN_INSTALL_DIR}) | ||
set(RUN_MODE "npu" CACHE STRING "run mode: npu/sim/cpu") | ||
|
||
if(EXISTS ${ASCEND_CANN_PACKAGE_PATH}/compiler/tikcpp/ascendc_kernel_cmake) | ||
set(ASCENDC_CMAKE_DIR ${ASCEND_CANN_PACKAGE_PATH}/compiler/tikcpp/ascendc_kernel_cmake) | ||
elseif(EXISTS ${ASCEND_CANN_PACKAGE_PATH}/ascendc_devkit/tikcpp/samples/cmake) | ||
set(ASCENDC_CMAKE_DIR ${ASCEND_CANN_PACKAGE_PATH}/ascendc_devkit/tikcpp/samples/cmake) | ||
file(GLOB SRC_FILES | ||
dequantize_q4_0.cpp | ||
quantize_q4_0.cpp | ||
) | ||
|
||
string(TOLOWER "${CMAKE_BUILD_TYPE}" lowercase_CMAKE_BUILD_TYPE) | ||
if(${lowercase_CMAKE_BUILD_TYPE} STREQUAL "debug") | ||
if (NOT DEFINED ENV{CMAKE_PREFIX_PATH}) | ||
set(CMAKE_PREFIX_PATH ${CANN_INSTALL_DIR}/tools/tikicpulib/lib/cmake) | ||
endif() | ||
|
||
find_package(tikicpulib REQUIRED) | ||
add_library(cann_kernels ${SRC_FILES} ascendc_kernels.cpp) | ||
target_link_libraries(cann_kernels PRIVATE | ||
ascendcl | ||
tikicpulib::ascend910B1 | ||
) | ||
|
||
target_compile_features(cann_kernels PRIVATE cxx_std_17) | ||
|
||
else() | ||
message(FATAL_ERROR "ascendc_kernel_cmake does not exist, please check whether the compiler package is installed.") | ||
endif() | ||
string(TOLOWER ${SOC_TYPE} SOC_VERSION) | ||
set(ASCEND_CANN_PACKAGE_PATH ${CANN_INSTALL_DIR}) | ||
set(RUN_MODE "npu" CACHE STRING "run mode: npu/sim") | ||
|
||
include(${ASCENDC_CMAKE_DIR}/ascendc.cmake) | ||
if(EXISTS ${ASCEND_CANN_PACKAGE_PATH}/compiler/tikcpp/ascendc_kernel_cmake) | ||
set(ASCENDC_CMAKE_DIR ${ASCEND_CANN_PACKAGE_PATH}/compiler/tikcpp/ascendc_kernel_cmake) | ||
elseif(EXISTS ${ASCEND_CANN_PACKAGE_PATH}/ascendc_devkit/tikcpp/samples/cmake) | ||
set(ASCENDC_CMAKE_DIR ${ASCEND_CANN_PACKAGE_PATH}/ascendc_devkit/tikcpp/samples/cmake) | ||
else() | ||
message(FATAL_ERROR "ascendc_kernel_cmake does not exist, please check whether the compiler package is installed.") | ||
endif() | ||
include(${ASCENDC_CMAKE_DIR}/ascendc.cmake) | ||
|
||
ascendc_library(ascendc_kernels STATIC | ||
threshold_opencv_kernel.cpp | ||
) | ||
ascendc_library(ascendc_kernels STATIC | ||
${SRC_FILES} | ||
) | ||
#ascendc_compile_definitions(ascendc_kernels PRIVATE -DASCENDC_DUMP) | ||
|
||
add_library(cann_kernels STATIC ascendc_kernels.cpp) | ||
target_link_libraries(cann_kernels PUBLIC ascendc_kernels) | ||
endif() |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,56 @@ | ||
#include "ascendc_kernels.h" | ||
|
||
#ifdef __CCE_KT_TEST__ | ||
#include "tikicpulib.h" | ||
#else | ||
#include "aclrtlaunch_ascendc_dequantize_q4_0.h" | ||
#include "aclrtlaunch_ascendc_quantize_q4_0.h" | ||
#endif | ||
|
||
|
||
#ifdef __CCE_KT_TEST__ | ||
#include <acl/acl.h> | ||
|
||
uint8_t* to_gm(uint8_t* ptr, size_t size) { | ||
uint8_t* gm = (uint8_t*)AscendC::GmAlloc(size); | ||
aclrtMemcpy(gm, size, ptr, size, ACL_MEMCPY_DEVICE_TO_HOST); | ||
return gm; | ||
} | ||
|
||
void free_gm(uint8_t* ptr) { | ||
aclrtFree(ptr); | ||
} | ||
|
||
extern "C" __global__ __aicore__ void ascendc_dequantize_q4_0(GM_ADDR x, GM_ADDR y, GM_ADDR size); | ||
extern "C" __global__ __aicore__ void ascendc_quantize_q4_0(GM_ADDR x, GM_ADDR y, GM_ADDR size); | ||
#endif | ||
|
||
void cann_dequantize_q4_0(uint32_t block_dim, void* stream, uint8_t* x, uint8_t* y, uint8_t* size) { | ||
#ifdef __CCE_KT_TEST__ | ||
uint8_t* size_host = to_gm(size, sizeof(size_t)); | ||
uint8_t* x_host = to_gm(x, *((size_t*)size_host)); | ||
uint8_t* y_host = to_gm(y, *((size_t*)size_host)); | ||
AscendC::SetKernelMode(KernelMode::AIV_MODE); | ||
ICPU_RUN_KF(ascendc_dequantize_q4_0, 1, x_host, y_host, size_host); | ||
free_gm(size_host); | ||
free_gm(x_host); | ||
free_gm(y_host); | ||
#else | ||
aclrtlaunch_ascendc_dequantize_q4_0(block_dim, stream, x, y, size); | ||
#endif | ||
} | ||
|
||
void cann_quantize_q4_0(uint32_t block_dim, void* stream, uint8_t* x, uint8_t* y, uint8_t* size) { | ||
#ifdef __CCE_KT_TEST__ | ||
uint8_t* size_host = to_gm(size, sizeof(size_t)); | ||
uint8_t* x_host = to_gm(x, *((size_t*)size_host)); | ||
uint8_t* y_host = to_gm(y, *((size_t*)size_host)); | ||
AscendC::SetKernelMode(KernelMode::AIV_MODE); | ||
ICPU_RUN_KF(ascendc_quantize_q4_0, 1, x_host, y_host, size_host); | ||
free_gm(size_host); | ||
free_gm(x_host); | ||
free_gm(y_host); | ||
#else | ||
aclrtlaunch_ascendc_quantize_q4_0(block_dim, stream, x, y, size); | ||
#endif | ||
} |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -1,4 +1,10 @@ | ||
#ifndef ASCENDC_KERNELS_H | ||
#define ASCENDC_KERNELS_H | ||
|
||
|
||
#include <stdint.h> | ||
|
||
void cann_dequantize_q4_0(uint32_t block_dim, void* stream, uint8_t* x, uint8_t* y, uint8_t* size); | ||
void cann_quantize_q4_0(uint32_t block_dim, void* stream, uint8_t* x, uint8_t* y, uint8_t* size); | ||
|
||
#endif //ASCENDC_KERNELS_H |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,108 @@ | ||
#include "dequantize_q4_0.h" | ||
|
||
using namespace AscendC; | ||
|
||
#define BUFFER_NUM 2 | ||
|
||
__aicore__ inline int32_t align_ceil(int32_t n, int32_t align) { return ((n + align) & ~(align-1)); } | ||
|
||
__aicore__ inline int32_t align_floor(int32_t n, int32_t align) { return (n & ~(align-1)); } | ||
|
||
|
||
#define QK4_0 32 | ||
typedef struct { | ||
uint16_t d; // delta | ||
uint8_t qs[QK4_0 / 2]; // nibbles / quants | ||
} block_q4_0; | ||
|
||
class KernelDequantizeQ4_0 | ||
{ | ||
public: | ||
__aicore__ inline KernelDequantizeQ4_0() {} | ||
__aicore__ inline void init(GM_ADDR x, GM_ADDR y, size_t size) { | ||
uint64_t src_block_size = | ||
align_ceil(size / GetBlockNum(), sizeof(block_q4_0)); | ||
uint64_t src_offset = GetBlockIdx() * src_block_size; | ||
src_block_size = | ||
(src_offset + src_block_size > (size / 32 * sizeof(block_q4_0))) | ||
? (size / 32 * sizeof(block_q4_0) - src_offset) | ||
: src_block_size; | ||
uint64_t dst_block_size = | ||
align_ceil(size / GetBlockNum(), QK4_0 * sizeof(float)); | ||
uint64_t dst_offset = GetBlockIdx() * dst_block_size; | ||
dst_block_size = | ||
(dst_offset + dst_block_size > size * sizeof(float)) | ||
? (size * sizeof(float) - dst_offset) | ||
: dst_block_size; | ||
|
||
xGM.SetGlobalBuffer((__gm__ int4b_t*)x + src_offset, src_block_size); | ||
yGM.SetGlobalBuffer((__gm__ float*)y + dst_offset, dst_block_size); | ||
|
||
pipe.InitBuffer(input_queue, BUFFER_NUM, QK4_0 * sizeof(int4b_t)); | ||
// Ascendc do not support cast int4b_t -> float, but support int4b_t -> | ||
// half -> float. | ||
pipe.InitBuffer(cast_queue, BUFFER_NUM, QK4_0 * sizeof(half)); | ||
pipe.InitBuffer(copy_queue, BUFFER_NUM, QK4_0 * sizeof(float)); | ||
pipe.InitBuffer(output_queue, BUFFER_NUM, QK4_0 * sizeof(float)); | ||
} | ||
|
||
__aicore__ inline void copy_in(uint32_t offset) { | ||
LocalTensor<int4b_t> x_local = input_queue.AllocTensor<int4b_t>(); | ||
// offset + 2 to skip scale. | ||
DataCopy(x_local, xGM[offset + 2], QK4_0); | ||
input_queue.EnQue(x_local); | ||
} | ||
|
||
__aicore__ inline void copy_out(uint32_t offset) { | ||
LocalTensor<float> y_local = output_queue.DeQue<float>(); | ||
DataCopy(yGM[offset], y_local, QK4_0); | ||
output_queue.FreeTensor(y_local); | ||
} | ||
|
||
__aicore__ inline void calculate(uint32_t offset, uint32_t len) { | ||
copy_in(offset); | ||
|
||
LocalTensor<int4b_t> x_local = input_queue.DeQue<int4b_t>(); | ||
LocalTensor<half> cast_local = cast_queue.AllocTensor<half>(); | ||
LocalTensor<float> copy_local = copy_queue.AllocTensor<float>(); | ||
LocalTensor<float> y_local = output_queue.AllocTensor<float>(); | ||
|
||
Cast(x_local, cast_local, RoundMode::CAST_NONE, QK4_0); | ||
Cast(cast_local, copy_local, RoundMode::CAST_NONE, QK4_0); | ||
|
||
|
||
} | ||
|
||
__aicore__ inline void run() { | ||
calculate(0, 10); | ||
} | ||
|
||
private: | ||
uint64_t block_size; | ||
uint64_t offset; | ||
|
||
TPipe pipe; | ||
GlobalTensor<int4b_t> xGM; | ||
GlobalTensor<float> yGM; | ||
TQue<QuePosition::VECIN, BUFFER_NUM> input_queue; | ||
TQue<QuePosition::VECOUT, BUFFER_NUM> output_queue; | ||
TQue<QuePosition::VECIN, BUFFER_NUM> cast_queue; | ||
TQue<QuePosition::VECIN, BUFFER_NUM> copy_queue; | ||
}; | ||
|
||
extern "C" __global__ __aicore__ void ascendc_dequantize_q4_0(GM_ADDR x, GM_ADDR y, GM_ADDR size) | ||
{ | ||
size_t size_ub; | ||
auto size_gm_ptr = (__gm__ uint8_t*)size; | ||
auto size_ub_ptr = (uint8_t*)&size_ub; | ||
|
||
for (int32_t i = 0; i < sizeof(size_t) / sizeof(uint8_t); | ||
++i, ++size_gm_ptr, ++size_ub_ptr) | ||
{ | ||
*size_ub_ptr = *size_gm_ptr; | ||
} | ||
|
||
KernelDequantizeQ4_0 dequantize_q4_0; | ||
dequantize_q4_0.init(x, y, size_ub); | ||
dequantize_q4_0.run(); | ||
} |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,7 @@ | ||
#ifndef DEQUANTIZE_Q4_0_H | ||
#define DEQUANTIZE_Q4_0_H | ||
|
||
#include "ascendc_kernels.h" | ||
#include "kernel_operator.h" | ||
|
||
#endif //DEQUANTIZE_Q4_0_H |
Oops, something went wrong.