Skip to content

Commit

Permalink
rename bcast to acl_tensor
Browse files Browse the repository at this point in the history
  • Loading branch information
hipudding committed Apr 7, 2024
1 parent 339a3fb commit 2deb900
Show file tree
Hide file tree
Showing 8 changed files with 69 additions and 79 deletions.
19 changes: 9 additions & 10 deletions ggml-cann.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -6,17 +6,9 @@
#include <mutex>

#include "ggml-backend-impl.h"
#include "ggml-cann/acl_ops.h"
#include "ggml-cann/aclnn_ops.h"
#include "ggml-cann/common.h"
#include "ggml-cann/acl_ops.h"

struct AclLifeCycle {
AclLifeCycle() { ACL_CHECK(aclInit(nullptr)); }

~AclLifeCycle() { ACL_CHECK(aclFinalize()); }
};

AclLifeCycle acl_life_cycle;

[[noreturn]] void ggml_cann_error(const char* stmt, const char* func,
const char* file, int line, const char* msg) {
Expand Down Expand Up @@ -477,9 +469,15 @@ GGML_CALL static void ggml_backend_cann_free(ggml_backend_t backend) {
ggml_backend_cann_context* cann_ctx =
(ggml_backend_cann_context*)backend->context;
ACL_CHECK(aclrtSynchronizeDevice());
cann_ctx->free_buffers();
ACL_CHECK(aclrtResetDevice(cann_ctx->device));
delete cann_ctx;
delete backend;

// Finalize when last device freed.
if (cann_ctx->device == ggml_backend_cann_get_device_count() - 1) {
ACL_CHECK(aclFinalize());
}
}

GGML_CALL static ggml_backend_buffer_type_t
Expand Down Expand Up @@ -678,7 +676,7 @@ GGML_CALL static bool ggml_backend_cann_supports_op(ggml_backend_t backend,
case GGML_OP_DIAG_MASK_INF:
return false;
case GGML_OP_SOFT_MAX:
return true;
return true;
case GGML_OP_ROPE:
case GGML_OP_ALIBI:
case GGML_OP_IM2COL:
Expand Down Expand Up @@ -844,6 +842,7 @@ GGML_CALL static ggml_backend_t ggml_backend_reg_cann_init(const char* params,
extern "C" GGML_CALL int ggml_backend_cann_reg_devices();

GGML_CALL int ggml_backend_cann_reg_devices() {
ACL_CHECK(aclInit(nullptr));
uint32_t device_count = ggml_backend_cann_get_device_count();
// initialization
for (uint32_t i = 0; i < device_count; i++) {
Expand Down
22 changes: 9 additions & 13 deletions ggml-cann/acl_ops.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -15,10 +15,6 @@ OpCaller::~OpCaller() {
for (aclDataBuffer* buffer : output_buffers) {
aclDestroyDataBuffer(buffer);
}
// TODO: may free before use.
for (void* ptr : ptrs) {
aclrtFree(ptr);
}
aclopDestroyAttr(attrs);
}

Expand Down Expand Up @@ -100,20 +96,21 @@ void ggml_cann_cont(ggml_backend_cann_context& ctx, ggml_tensor* dst) {
OpCaller op;
op.name("ViewCopy")
.input_no_contiguous(dst, "dst")
.input(dst->ne, ACL_INT64, 1, size_stride_dim, "dst_size", ctx.stream())
.input(dst_stride, ACL_INT64, 1, size_stride_dim, "dst_stride",
.input(ctx, dst->ne, ACL_INT64, 1, size_stride_dim, "dst_size",
ctx.stream())
.input(ctx, dst_stride, ACL_INT64, 1, size_stride_dim, "dst_stride",
ctx.stream())
.input(storage_offset, ACL_INT64, 1, storage_offset_dim,
.input(ctx, storage_offset, ACL_INT64, 1, storage_offset_dim,
"dst_storage_offset", ctx.stream())
.input_no_contiguous(src, "src")
.input(src->ne, ACL_INT64, 1, size_stride_dim, "src_size", ctx.stream())
.input(src_stride, ACL_INT64, 1, size_stride_dim, "src_stride",
.input(ctx, src->ne, ACL_INT64, 1, size_stride_dim, "src_size",
ctx.stream())
.input(ctx, src_stride, ACL_INT64, 1, size_stride_dim, "src_stride",
ctx.stream())
.input(storage_offset, ACL_INT64, 1, storage_offset_dim,
.input(ctx, storage_offset, ACL_INT64, 1, storage_offset_dim,
"src_storage_offset", ctx.stream())
.output(dst, "dst")
.run(ctx.stream());
//aclrtSynchronizeStream(ctx.stream());
}

void ggml_cann_pad(ggml_backend_cann_context& ctx, ggml_tensor* dst) {
Expand All @@ -125,8 +122,7 @@ void ggml_cann_pad(ggml_backend_cann_context& ctx, ggml_tensor* dst) {
OpCaller op;
op.name("Pad")
.input(src, "x")
.input(paddings, ACL_INT64, 2, dim, "paddings", ctx.stream())
.input(ctx, paddings, ACL_INT64, 2, dim, "paddings", ctx.stream())
.output(dst, "y")
.run(ctx.stream());
//aclrtSynchronizeStream(ctx.stream());
}
9 changes: 4 additions & 5 deletions ggml-cann/acl_ops.h
Original file line number Diff line number Diff line change
Expand Up @@ -7,7 +7,7 @@
#include <string>
#include <vector>

#include "bcast.h"
#include "acl_tensor.h"
#include "common.h"

struct OpCaller {
Expand Down Expand Up @@ -38,17 +38,16 @@ struct OpCaller {
OpCaller& attr(float value, const char* name);

template <typename T>
OpCaller& input(T* values, aclDataType dtype, size_t dims, int64_t* dim,
OpCaller& input(ggml_backend_cann_context& ctx, T* values,
aclDataType dtype, size_t dims, int64_t* dim,
const char* name, aclrtStream stream = nullptr) {
void* device_ptr = nullptr;
size_t n_elem = 1;
for (size_t i = 0; i < dims; i++) {
n_elem *= dim[i];
}

size_t n_bytes = n_elem * sizeof(T);
ACL_CHECK(aclrtMalloc(&device_ptr, n_bytes, ACL_MEM_MALLOC_HUGE_FIRST));
ptrs.push_back(device_ptr);
void* device_ptr = ctx.alloc_buffer(n_bytes);
if (stream == nullptr) {
ACL_CHECK(aclrtMemcpy(device_ptr, n_bytes, values, n_bytes,
ACL_MEMCPY_HOST_TO_DEVICE));
Expand Down
30 changes: 14 additions & 16 deletions ggml-cann/bcast.cpp → ggml-cann/acl_tensor.cpp
Original file line number Diff line number Diff line change
@@ -1,4 +1,5 @@
#include "bcast.h"
#include "acl_tensor.h"

#include <algorithm>
#include <cstring>

Expand Down Expand Up @@ -32,7 +33,8 @@ aclDataType type_mapping(ggml_type type) {
* changed to satisfy the broadcast. @sa: get_bcast_shape.
*/
aclTensor* create_acl_tensor(const ggml_tensor* tensor, int64_t* bcast_ne,
size_t* bcast_nb, int64_t bcast_dims, aclFormat format) {
size_t* bcast_nb, int64_t bcast_dims,
aclFormat format) {
size_t size = ggml_nbytes(tensor);
void* deviceAddr = nullptr;

Expand Down Expand Up @@ -74,9 +76,9 @@ aclTensor* create_acl_tensor(const ggml_tensor* tensor, int64_t* bcast_ne,
return acl_tensor;
}

aclTensor* create_acl_tensor(void* data_ptr, aclDataType dtype, size_t type_size, int64_t* ne,
size_t* nb, int64_t dims, aclFormat format) {

aclTensor* create_acl_tensor(void* data_ptr, aclDataType dtype,
size_t type_size, int64_t* ne, size_t* nb,
int64_t dims, aclFormat format) {
int64_t tmp_ne[GGML_MAX_DIMS * 2];
int64_t tmp_stride[GGML_MAX_DIMS * 2];

Expand All @@ -88,9 +90,8 @@ aclTensor* create_acl_tensor(void* data_ptr, aclDataType dtype, size_t type_size
std::reverse(tmp_ne, tmp_ne + dims);
std::reverse(tmp_stride, tmp_stride + dims);

aclTensor* acl_tensor =
aclCreateTensor(tmp_ne, dims, dtype, tmp_stride, 0,
format, tmp_ne, dims, data_ptr);
aclTensor* acl_tensor = aclCreateTensor(tmp_ne, dims, dtype, tmp_stride, 0,
format, tmp_ne, dims, data_ptr);

return acl_tensor;
}
Expand Down Expand Up @@ -132,8 +133,7 @@ aclTensor* create_acl_tensor(void* data_ptr, aclDataType dtype, size_t type_size
*/
int64_t get_bcast_shape(const ggml_tensor* src0, const ggml_tensor* src1,
int64_t* bcast_ne_src0, int64_t* bcast_ne_src1,
size_t* bcast_nb_src0,
size_t* bcast_nb_src1) {
size_t* bcast_nb_src0, size_t* bcast_nb_src1) {
GGML_ASSERT(ggml_can_repeat(src1, src0));
int bcast_dim_cnt = 0;
for (int i = 0; i < GGML_MAX_DIMS; i++) {
Expand All @@ -147,12 +147,10 @@ int64_t get_bcast_shape(const ggml_tensor* src0, const ggml_tensor* src1,
// Need to add an extra dim.
bcast_ne_src0[bcast_dim_cnt] = nr;
bcast_ne_src1[bcast_dim_cnt] = 1;
bcast_nb_src0[bcast_dim_cnt] =
bcast_nb_src0[bcast_dim_cnt - 1] *
bcast_ne_src0[bcast_dim_cnt - 1];
bcast_nb_src1[bcast_dim_cnt] =
bcast_nb_src1[bcast_dim_cnt - 1] *
bcast_ne_src1[bcast_dim_cnt - 1];
bcast_nb_src0[bcast_dim_cnt] = bcast_nb_src0[bcast_dim_cnt - 1] *
bcast_ne_src0[bcast_dim_cnt - 1];
bcast_nb_src1[bcast_dim_cnt] = bcast_nb_src1[bcast_dim_cnt - 1] *
bcast_ne_src1[bcast_dim_cnt - 1];
bcast_dim_cnt++;
}
}
Expand Down
13 changes: 6 additions & 7 deletions ggml-cann/bcast.h → ggml-cann/acl_tensor.h
Original file line number Diff line number Diff line change
@@ -1,5 +1,5 @@
#ifndef CANN_BCAST_H
#define CANN_BCAST_H
#ifndef CANN_ACL_TENSOR_H
#define CANN_ACL_TENSOR_H

#include <aclnn/aclnn_base.h>

Expand All @@ -10,8 +10,7 @@ aclDataType type_mapping(ggml_type type);

aclTensor* create_acl_tensor(const ggml_tensor* tensor,
int64_t* bcast_ne = nullptr,
size_t* bcast_nb = nullptr,
int64_t bcast_dims = 0,
size_t* bcast_nb = nullptr, int64_t bcast_dims = 0,
aclFormat format = ACL_FORMAT_ND);

aclTensor* create_acl_tensor(void* data_ptr, aclDataType dtype,
Expand All @@ -28,12 +27,12 @@ int64_t get_bcast_shape(const ggml_tensor* src0, const ggml_tensor* src1,
#define BCAST_SHAPE(src0, src1) \
int64_t bcast_ne_##src0[GGML_MAX_DIMS * 2]; \
int64_t bcast_ne_##src1[GGML_MAX_DIMS * 2]; \
size_t bcast_nb_##src0[GGML_MAX_DIMS * 2]; \
size_t bcast_nb_##src1[GGML_MAX_DIMS * 2]; \
size_t bcast_nb_##src0[GGML_MAX_DIMS * 2]; \
size_t bcast_nb_##src1[GGML_MAX_DIMS * 2]; \
int64_t bcast_dims = \
get_bcast_shape(src0, src1, bcast_ne_##src0, bcast_ne_##src1, \
bcast_nb_##src0, bcast_nb_##src1);

#define BCAST_PARAM(src) bcast_ne_##src, bcast_nb_##src, bcast_dims

#endif // CANN_BCAST_H
#endif // CANN_ACL_TENSOR_H
45 changes: 24 additions & 21 deletions ggml-cann/aclnn_ops.cpp
Original file line number Diff line number Diff line change
@@ -1,10 +1,10 @@
#include "aclnn_ops.h"

#include <aclnnop/aclnn_layer_norm.h>
#include <aclnnop/aclnn_cast.h>
#include <aclnnop/aclnn_group_norm.h>
#include <aclnnop/aclnn_softmax.h>
#include <aclnnop/aclnn_layer_norm.h>
#include <aclnnop/aclnn_repeat.h>
#include <aclnnop/aclnn_softmax.h>

#include <cmath>
#include <cstring>
Expand All @@ -25,13 +25,14 @@ void ggml_cann_repeat(ggml_backend_cann_context& ctx, ggml_tensor* dst) {
int64_t repeatsArray[] = {dst->ne[3] / src->ne[3], dst->ne[2] / src->ne[2],
dst->ne[1] / src->ne[1], dst->ne[0] / src->ne[0]};

aclIntArray *repeats = aclCreateIntArray(repeatsArray, GGML_MAX_DIMS);
aclIntArray* repeats = aclCreateIntArray(repeatsArray, GGML_MAX_DIMS);

uint64_t workspaceSize = 0;
aclOpExecutor* executor;
void* workspaceAddr = nullptr;

ACL_CHECK(aclnnRepeatGetWorkspaceSize(acl_src, repeats, acl_dst, &workspaceSize, &executor));
ACL_CHECK(aclnnRepeatGetWorkspaceSize(acl_src, repeats, acl_dst,
&workspaceSize, &executor));

if (workspaceSize > 0) {
workspaceAddr = ctx.alloc_buffer(workspaceSize);
Expand All @@ -42,7 +43,6 @@ void ggml_cann_repeat(ggml_backend_cann_context& ctx, ggml_tensor* dst) {
ACL_CHECK(aclDestroyIntArray(repeats));
ACL_CHECK(aclDestroyTensor(acl_src));
ACL_CHECK(aclDestroyTensor(acl_dst));

}

void ggml_cann_add(ggml_backend_cann_context& ctx, ggml_tensor* dst) {
Expand Down Expand Up @@ -140,7 +140,7 @@ void ggml_cann_concat(ggml_backend_cann_context& ctx, ggml_tensor* dst) {
ACL_CHECK(aclnnCatGetWorkspaceSize(tensorList, 1, acl_dst, &workspaceSize,
&executor));
if (workspaceSize > 0) {
workspaceAddr = ctx.alloc_buffer(workspaceSize);
workspaceAddr = ctx.alloc_buffer(workspaceSize);
}

aclrtStream main_stream = ctx.stream();
Expand Down Expand Up @@ -262,7 +262,8 @@ void ggml_cann_argsort(ggml_backend_cann_context& ctx, ggml_tensor* dst) {

aclTensor* acl_src = create_acl_tensor(src);
aclTensor* acl_dst = create_acl_tensor(dst);
void* buffer = ctx.alloc_buffer(ggml_nbytes(dst) / ggml_type_size(dst->type) * sizeof(int64_t));
void* buffer = ctx.alloc_buffer(
ggml_nbytes(dst) / ggml_type_size(dst->type) * sizeof(int64_t));
aclTensor* tmp_tensor =
create_acl_tensor(buffer, ACL_INT64, ggml_type_size(dst->type), dst->ne,
dst->nb, GGML_MAX_DIMS);
Expand Down Expand Up @@ -311,8 +312,8 @@ void ggml_cann_norm(ggml_backend_cann_context& ctx, ggml_tensor* dst) {

std::vector<int64_t> normData = {dst->ne[0]};
aclIntArray* norm = aclCreateIntArray(normData.data(), normData.size());
ACL_CHECK(aclnnLayerNormGetWorkspaceSize(acl_src, norm, nullptr, nullptr, eps,
acl_dst, nullptr, nullptr,
ACL_CHECK(aclnnLayerNormGetWorkspaceSize(acl_src, norm, nullptr, nullptr,
eps, acl_dst, nullptr, nullptr,
&workspaceSize, &executor));

if (workspaceSize > 0) {
Expand Down Expand Up @@ -381,33 +382,37 @@ void ggml_cann_softmax(ggml_backend_cann_context& ctx, ggml_tensor* dst) {
aclTensor* acl_src0 = create_acl_tensor(src0);
aclTensor* acl_dst = create_acl_tensor(dst);

float scale = 1.0f;
float scale = 1.0f;
float max_bias = 0.0f;

memcpy(&scale, (float *) dst->op_params + 0, sizeof(float));
memcpy(&max_bias, (float *) dst->op_params + 1, sizeof(float));
memcpy(&scale, (float*)dst->op_params + 0, sizeof(float));
memcpy(&max_bias, (float*)dst->op_params + 1, sizeof(float));

aclScalar* acl_scale = aclCreateScalar(&scale, aclDataType::ACL_FLOAT);
aclScalar* acl_max_bias = aclCreateScalar(&max_bias, aclDataType::ACL_FLOAT);
aclScalar* acl_max_bias =
aclCreateScalar(&max_bias, aclDataType::ACL_FLOAT);

size_t n_bytes = ggml_nbytes(src0);
void *buffer = ctx.alloc_buffer(n_bytes);
aclTensor* temp_tensor = create_acl_tensor(buffer, ACL_FLOAT, ggml_type_size(src0->type), src0->ne, src0->nb, GGML_MAX_DIMS);
void* buffer = ctx.alloc_buffer(n_bytes);
aclTensor* temp_tensor =
create_acl_tensor(buffer, ACL_FLOAT, ggml_type_size(src0->type),
src0->ne, src0->nb, GGML_MAX_DIMS);

uint64_t workspaceSize = 0;
aclOpExecutor* executor;
void* workspaceAddr = nullptr;

aclnnMulsGetWorkspaceSize(acl_src0, acl_scale, temp_tensor, &workspaceSize, &executor);
aclnnMulsGetWorkspaceSize(acl_src0, acl_scale, temp_tensor, &workspaceSize,
&executor);
if (workspaceSize > 0) {
workspaceAddr = ctx.alloc_buffer(workspaceSize);
}

aclrtStream stream = ctx.stream();
aclnnMuls(workspaceAddr, workspaceSize, executor, stream);

ACL_CHECK(aclnnSoftmaxGetWorkspaceSize(
temp_tensor, 3, acl_dst, &workspaceSize, &executor));
ACL_CHECK(aclnnSoftmaxGetWorkspaceSize(temp_tensor, 3, acl_dst,
&workspaceSize, &executor));

if (workspaceSize > 0) {
workspaceAddr = ctx.alloc_buffer(workspaceSize);
Expand All @@ -419,6 +424,4 @@ void ggml_cann_softmax(ggml_backend_cann_context& ctx, ggml_tensor* dst) {
ACL_CHECK(aclDestroyTensor(acl_dst));
}

void ggml_cann_acc(ggml_backend_cann_context& ctx, ggml_tensor* dst) {

}
void ggml_cann_acc(ggml_backend_cann_context& ctx, ggml_tensor* dst) {}
2 changes: 1 addition & 1 deletion ggml-cann/aclnn_ops.h
Original file line number Diff line number Diff line change
Expand Up @@ -16,7 +16,7 @@
#include <aclnnop/aclnn_silu.h>
#include <aclnnop/aclnn_tanh.h>

#include "bcast.h"
#include "acl_tensor.h"
#include "common.h"

void ggml_cann_repeat(ggml_backend_cann_context& ctx, ggml_tensor* dst);
Expand Down
8 changes: 2 additions & 6 deletions ggml-cann/common.h
Original file line number Diff line number Diff line change
Expand Up @@ -81,9 +81,7 @@ struct ggml_backend_cann_context {
return buffer;
}

void* alloc_buffer(size_t size) {
return alloc_buffer(size, 0);
}
void* alloc_buffer(size_t size) { return alloc_buffer(size, 0); }

void free_buffers() {
for (int i = 0; i < GGML_CANN_MAX_STREAMS; i++) {
Expand All @@ -107,6 +105,4 @@ struct ggml_backend_cann_context {
aclrtStream stream() { return stream(0); }
};



#endif //CANN_COMMON_H
#endif // CANN_COMMON_H

0 comments on commit 2deb900

Please sign in to comment.