Skip to content

Commit 58d8f8e

Browse files
committed
[feat] adapt metax-gpu maca platform
1 parent e679567 commit 58d8f8e

30 files changed

+653
-22
lines changed

build.sh

Lines changed: 9 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -83,6 +83,15 @@ function copy_ops(){
8383
echo -e "BASE and ROCM ops have been copy to fastdeploy"
8484
return
8585
fi
86+
is_maca=`$python -c "import paddle; print(paddle.device.is_compiled_with_custom_device('metax_gpu'))"`
87+
if [ "$is_maca" = "True" ]; then
88+
DEVICE_TYPE="gpu"
89+
mkdir -p ../fastdeploy/model_executor/ops/base
90+
cp -r ./${OPS_TMP_DIR_BASE}/${WHEEL_BASE_NAME}/* ../fastdeploy/model_executor/ops/base
91+
cp -r ./${OPS_TMP_DIR}/${WHEEL_NAME}/* ../fastdeploy/model_executor/ops/gpu
92+
echo -e "MACA ops have been copy to fastdeploy"
93+
return
94+
fi
8695
mkdir -p ../fastdeploy/model_executor/ops/base
8796
is_cuda=`$python -c "import paddle; print(paddle.is_compiled_with_cuda())"`
8897
if [ "$is_cuda" = "True" ]; then

custom_ops/gpu_ops/get_padding_offset.cu

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -60,7 +60,7 @@ std::vector<paddle::Tensor> GetPaddingOffset(const paddle::Tensor &input_ids,
6060
const paddle::Tensor &cum_offsets,
6161
const paddle::Tensor &token_num,
6262
const paddle::Tensor &seq_len) {
63-
#ifdef PADDLE_WITH_CUSTOM_DEVICE
63+
#if defined(PADDLE_WITH_CUSTOM_DEVICE) && !defined(PADDLE_WITH_CUSTOM_DEVICE_METAX_GPU)
6464
auto dev_ctx = static_cast<const phi::CustomContext*>(paddle::experimental::DeviceContextPool::Instance().Get(input_ids.place()));
6565
auto cu_stream = dev_ctx->stream();
6666
#else

custom_ops/gpu_ops/helper.h

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -509,6 +509,7 @@ static void PrintMatrix3(const T *mat_d, int num, std::string name) {
509509
}
510510

511511
#ifndef PADDLE_WITH_HIP
512+
#ifndef PADDLE_WITH_CUSTOM_DEVICE_METAX_GPU
512513
__forceinline__ __device__ uint32_t ld_flag_acquire(uint32_t *flag_addr,
513514
int mode = 0) {
514515
uint32_t flag;
@@ -541,6 +542,7 @@ __forceinline__ __device__ void st_flag_release(uint32_t *flag_addr,
541542
"l"(flag_addr));
542543
}
543544
}
545+
#endif
544546

545547
inline int get_cuda_max_shared_memory_per_block_opt_in(int const device) {
546548
int max_shared_mem_per_block_opt_in = 0;

custom_ops/gpu_ops/rebuild_padding.cu

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -91,7 +91,7 @@ std::vector<paddle::Tensor> rebuild_padding(
9191
typedef typename traits_::DataType DataType_;
9292
typedef typename traits_::data_t data_t;
9393

94-
#ifdef PADDLE_WITH_CUSTOM_DEVICE
94+
#if defined(PADDLE_WITH_CUSTOM_DEVICE) && !defined(PADDLE_WITH_CUSTOM_DEVICE_METAX_GPU)
9595
auto dev_ctx = static_cast<const phi::CustomContext*>(paddle::experimental::DeviceContextPool::Instance().Get(tmp_out.place()));
9696
auto cu_stream = dev_ctx->stream();
9797
#else

custom_ops/gpu_ops/set_value_by_flags.cu

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -52,7 +52,7 @@ void SetValueByFlagsAndIdx(const paddle::Tensor &pre_ids_all,
5252
const paddle::Tensor &seq_lens_decoder,
5353
const paddle::Tensor &step_idx,
5454
const paddle::Tensor &stop_flags) {
55-
#ifdef PADDLE_WITH_CUSTOM_DEVICE
55+
#if defined(PADDLE_WITH_CUSTOM_DEVICE) && !defined(PADDLE_WITH_CUSTOM_DEVICE_METAX_GPU)
5656
auto dev_ctx = static_cast<const phi::CustomContext*>(paddle::experimental::DeviceContextPool::Instance().Get(stop_flags.place()));
5757
auto cu_stream = dev_ctx->stream();
5858
#else

custom_ops/gpu_ops/step.cu

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -323,7 +323,7 @@ void StepPaddle(const paddle::Tensor &stop_flags,
323323
const paddle::Tensor &first_token_ids,
324324
const int block_size,
325325
const int encoder_decoder_block_num) {
326-
#ifdef PADDLE_WITH_CUSTOM_DEVICE
326+
#if defined(PADDLE_WITH_CUSTOM_DEVICE) && !defined(PADDLE_WITH_CUSTOM_DEVICE_METAX_GPU)
327327
auto dev_ctx = static_cast<const phi::CustomContext*>(paddle::experimental::DeviceContextPool::Instance().Get(seq_lens_this_time.place()));
328328
auto cu_stream = dev_ctx->stream();
329329
#else

custom_ops/gpu_ops/stop_generation_multi_ends.cu

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -74,7 +74,7 @@ void GetStopFlagsMulti(const paddle::Tensor &topk_ids,
7474
}
7575
}
7676

77-
#ifdef PADDLE_WITH_CUSTOM_DEVICE
77+
#if defined(PADDLE_WITH_CUSTOM_DEVICE) && !defined(PADDLE_WITH_CUSTOM_DEVICE_METAX_GPU)
7878
auto dev_ctx = static_cast<const phi::CustomContext*>(paddle::experimental::DeviceContextPool::Instance().Get(topk_ids.place()));
7979
auto cu_stream = dev_ctx->stream();
8080
#else

custom_ops/gpu_ops/stop_generation_multi_stop_seqs.cu

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -89,7 +89,7 @@ void GetStopFlagsMultiSeqs(const paddle::Tensor &topk_ids,
8989
PD_CHECK(topk_ids.dtype() == paddle::DataType::INT64);
9090
PD_CHECK(stop_flags.dtype() == paddle::DataType::BOOL);
9191

92-
#ifdef PADDLE_WITH_CUSTOM_DEVICE
92+
#if defined(PADDLE_WITH_CUSTOM_DEVICE) && !defined(PADDLE_WITH_CUSTOM_DEVICE_METAX_GPU)
9393
auto dev_ctx = static_cast<const phi::CustomContext*>(paddle::experimental::DeviceContextPool::Instance().Get(topk_ids.place()));
9494
auto cu_stream = dev_ctx->stream();
9595
#else

custom_ops/gpu_ops/token_penalty_multi_scores.cu

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -156,7 +156,7 @@ void token_penalty_multi_scores_kernel(const paddle::Tensor &pre_ids,
156156
typedef PDTraits<D> traits_;
157157
typedef typename traits_::DataType DataType_;
158158
typedef typename traits_::data_t data_t;
159-
#ifdef PADDLE_WITH_CUSTOM_DEVICE
159+
#if defined(PADDLE_WITH_CUSTOM_DEVICE) && !defined(PADDLE_WITH_CUSTOM_DEVICE_METAX_GPU)
160160
auto dev_ctx = static_cast<const phi::CustomContext*>(paddle::experimental::DeviceContextPool::Instance().Get(logits.place()));
161161
auto cu_stream = dev_ctx->stream();
162162
#else

custom_ops/gpu_ops/update_inputs.cu

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -75,7 +75,7 @@ void UpdateInputes(const paddle::Tensor &stop_flags,
7575
const paddle::Tensor &stop_nums,
7676
const paddle::Tensor &next_tokens,
7777
const paddle::Tensor &is_block_step) {
78-
#ifdef PADDLE_WITH_CUSTOM_DEVICE
78+
#if defined(PADDLE_WITH_CUSTOM_DEVICE) && !defined(PADDLE_WITH_CUSTOM_DEVICE_METAX_GPU)
7979
auto dev_ctx = static_cast<const phi::CustomContext*>(paddle::experimental::DeviceContextPool::Instance().Get(input_ids.place()));
8080
auto cu_stream = dev_ctx->stream();
8181
#else

0 commit comments

Comments
 (0)