Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
26 changes: 26 additions & 0 deletions include/infiniop/ops/diff.h
Original file line number Diff line number Diff line change
@@ -0,0 +1,26 @@
#ifndef __INFINIOP_DIFF_API_H__
#define __INFINIOP_DIFF_API_H__

#include "../operator_descriptor.h"

typedef struct InfiniopDescriptor *infiniopDiffDescriptor_t;

__C __export infiniStatus_t infiniopCreateDiffDescriptor(infiniopHandle_t handle,
infiniopDiffDescriptor_t *desc_ptr,
infiniopTensorDescriptor_t y,
infiniopTensorDescriptor_t x,
int dim,
int n);

__C __export infiniStatus_t infiniopGetDiffWorkspaceSize(infiniopDiffDescriptor_t desc, size_t *size);

__C __export infiniStatus_t infiniopDiff(infiniopDiffDescriptor_t desc,
void *workspace,
size_t workspace_size,
void *y,
const void *x,
void *stream);

__C __export infiniStatus_t infiniopDestroyDiffDescriptor(infiniopDiffDescriptor_t desc);

#endif
24 changes: 24 additions & 0 deletions include/infiniop/ops/digamma.h
Original file line number Diff line number Diff line change
@@ -0,0 +1,24 @@
#ifndef __INFINIOP_DIGAMMA_API_H__
#define __INFINIOP_DIGAMMA_API_H__

#include "../operator_descriptor.h"

typedef struct InfiniopDescriptor *infiniopDigammaDescriptor_t;

__C __export infiniStatus_t infiniopCreateDigammaDescriptor(infiniopHandle_t handle,
infiniopDigammaDescriptor_t *desc_ptr,
infiniopTensorDescriptor_t y,
infiniopTensorDescriptor_t x);

__C __export infiniStatus_t infiniopGetDigammaWorkspaceSize(infiniopDigammaDescriptor_t desc, size_t *size);

__C __export infiniStatus_t infiniopDigamma(infiniopDigammaDescriptor_t desc,
void *workspace,
size_t workspace_size,
void *y,
const void *x,
void *stream);

__C __export infiniStatus_t infiniopDestroyDigammaDescriptor(infiniopDigammaDescriptor_t desc);

#endif
27 changes: 27 additions & 0 deletions include/infiniop/ops/dist.h
Original file line number Diff line number Diff line change
@@ -0,0 +1,27 @@
#ifndef __INFINIOP_DIST_API_H__
#define __INFINIOP_DIST_API_H__

#include "../operator_descriptor.h"

typedef struct InfiniopDescriptor *infiniopDistDescriptor_t;

__C __export infiniStatus_t infiniopCreateDistDescriptor(infiniopHandle_t handle,
infiniopDistDescriptor_t *desc_ptr,
infiniopTensorDescriptor_t y,
infiniopTensorDescriptor_t x1,
infiniopTensorDescriptor_t x2,
double p);

__C __export infiniStatus_t infiniopGetDistWorkspaceSize(infiniopDistDescriptor_t desc, size_t *size);

__C __export infiniStatus_t infiniopDist(infiniopDistDescriptor_t desc,
void *workspace,
size_t workspace_size,
void *y,
const void *x1,
const void *x2,
void *stream);

__C __export infiniStatus_t infiniopDestroyDistDescriptor(infiniopDistDescriptor_t desc);

#endif
24 changes: 24 additions & 0 deletions include/infiniop/ops/logdet.h
Original file line number Diff line number Diff line change
@@ -0,0 +1,24 @@
#ifndef __INFINIOP_LOGDET_API_H__
#define __INFINIOP_LOGDET_API_H__

#include "../operator_descriptor.h"

typedef struct InfiniopDescriptor *infiniopLogdetDescriptor_t;

__C __export infiniStatus_t infiniopCreateLogdetDescriptor(infiniopHandle_t handle,
infiniopLogdetDescriptor_t *desc_ptr,
infiniopTensorDescriptor_t y,
infiniopTensorDescriptor_t x);

__C __export infiniStatus_t infiniopGetLogdetWorkspaceSize(infiniopLogdetDescriptor_t desc, size_t *size);

__C __export infiniStatus_t infiniopLogdet(infiniopLogdetDescriptor_t desc,
void *workspace,
size_t workspace_size,
void *y,
const void *x,
void *stream);

__C __export infiniStatus_t infiniopDestroyLogdetDescriptor(infiniopLogdetDescriptor_t desc);

#endif
28 changes: 28 additions & 0 deletions include/infiniop/ops/pad.h
Original file line number Diff line number Diff line change
@@ -0,0 +1,28 @@
#ifndef __INFINIOP_PAD_API_H__
#define __INFINIOP_PAD_API_H__

#include "../operator_descriptor.h"

typedef struct InfiniopDescriptor *infiniopPadDescriptor_t;

__C __export infiniStatus_t infiniopCreatePadDescriptor(infiniopHandle_t handle,
infiniopPadDescriptor_t *desc_ptr,
infiniopTensorDescriptor_t y,
infiniopTensorDescriptor_t x,
void *pad,
size_t pad_size,
const char *mode,
double value);

__C __export infiniStatus_t infiniopGetPadWorkspaceSize(infiniopPadDescriptor_t desc, size_t *size);

__C __export infiniStatus_t infiniopPad(infiniopPadDescriptor_t desc,
void *workspace,
size_t workspace_size,
void *y,
const void *x,
void *stream);

__C __export infiniStatus_t infiniopDestroyPadDescriptor(infiniopPadDescriptor_t desc);

#endif
157 changes: 157 additions & 0 deletions src/infiniop/ops/diff/cpu/diff_cpu.cc
Original file line number Diff line number Diff line change
@@ -0,0 +1,157 @@
#include "diff_cpu.h"
#include "../../../utils.h"
#include <algorithm>
#include <cmath>

namespace op::diff::cpu {

utils::Result<DiffInfo> DiffInfo::create(
infiniopTensorDescriptor_t x_desc,
infiniopTensorDescriptor_t y_desc,
int dim,
int n) {

if (n <= 0) {
return INFINI_STATUS_BAD_PARAM;
}

auto x_shape = x_desc->shape();
auto y_shape = y_desc->shape();
size_t ndim = x_desc->ndim();

if (dim < 0) {
dim += static_cast<int>(ndim);
}
if (dim < 0 || dim >= static_cast<int>(ndim)) {
return INFINI_STATUS_BAD_PARAM;
}

if (x_shape[dim] <= static_cast<size_t>(n)) {
return INFINI_STATUS_BAD_TENSOR_SHAPE;
}

// Calculate output shape
std::vector<size_t> expected_output_shape = x_shape;
expected_output_shape[dim] -= n;

if (y_shape != expected_output_shape) {
return INFINI_STATUS_BAD_TENSOR_SHAPE;
}

DiffInfo info;
info.ndim = ndim;
info.dim = dim;
info.n = n;
info.input_shape = x_shape;
info.output_shape = y_shape;
info.input_strides = x_desc->strides();
info.output_strides = y_desc->strides();
info.input_size = x_desc->numel();
info.output_size = y_desc->numel();

return utils::Result<DiffInfo>(std::move(info));
}

Descriptor::~Descriptor() = default;

infiniStatus_t Descriptor::create(
infiniopHandle_t handle,
Descriptor **desc_ptr,
infiniopTensorDescriptor_t y_desc,
infiniopTensorDescriptor_t x_desc,
int dim,
int n) {

auto dtype = x_desc->dtype();
CHECK_DTYPE(dtype, INFINI_DTYPE_F16, INFINI_DTYPE_F32, INFINI_DTYPE_F64, INFINI_DTYPE_BF16);

auto info_result = DiffInfo::create(x_desc, y_desc, dim, n);
CHECK_RESULT(info_result);

*desc_ptr = new Descriptor(dtype, info_result.take(), handle->device, handle->device_id);
return INFINI_STATUS_SUCCESS;
}

template <typename T>
void diff_impl(
const DiffInfo &info,
T *y,
const T *x) {

// Compute n-th order difference along specified dimension
// For n=1: y[i] = x[i+1] - x[i]
// For n>1: recursively apply diff

size_t dim_size = info.input_shape[info.dim];
size_t output_dim_size = info.output_shape[info.dim];

// Calculate sizes before and after the dimension
size_t size_before = 1;
for (size_t i = 0; i < static_cast<size_t>(info.dim); ++i) {
size_before *= info.input_shape[i];
}
size_t size_after = 1;
for (size_t i = static_cast<size_t>(info.dim) + 1; i < info.ndim; ++i) {
size_after *= info.input_shape[i];
}

// Allocate temporary buffer for recursive diff computation
std::vector<T> temp_input(info.input_size);
std::vector<T> temp_output(info.output_size);
std::memcpy(temp_input.data(), x, info.input_size * sizeof(T));

// Apply diff n times
for (int order = 0; order < info.n; ++order) {
size_t current_dim_size = dim_size - order;
size_t current_output_size = current_dim_size - 1;

#pragma omp parallel for collapse(2)
for (ptrdiff_t b = 0; b < static_cast<ptrdiff_t>(size_before); ++b) {
for (ptrdiff_t a = 0; a < static_cast<ptrdiff_t>(size_after); ++a) {
for (size_t i = 0; i < current_output_size; ++i) {
size_t idx1 = b * current_dim_size * size_after + i * size_after + a;
size_t idx2 = b * current_dim_size * size_after + (i + 1) * size_after + a;
size_t out_idx = b * current_output_size * size_after + i * size_after + a;
temp_output[out_idx] = temp_input[idx2] - temp_input[idx1];
}
}
}

if (order < info.n - 1) {
std::swap(temp_input, temp_output);
current_dim_size = current_output_size;
}
}

// Copy final result to output
std::memcpy(y, temp_output.data(), info.output_size * sizeof(T));
}

infiniStatus_t Descriptor::calculate(
void *workspace,
size_t workspace_size,
void *y,
const void *x,
void *stream) const {

switch (_dtype) {
case INFINI_DTYPE_F16:
diff_impl<fp16_t>(_info, reinterpret_cast<fp16_t *>(y), reinterpret_cast<const fp16_t *>(x));
break;
case INFINI_DTYPE_BF16:
diff_impl<bf16_t>(_info, reinterpret_cast<bf16_t *>(y), reinterpret_cast<const bf16_t *>(x));
break;
case INFINI_DTYPE_F32:
diff_impl<float>(_info, reinterpret_cast<float *>(y), reinterpret_cast<const float *>(x));
break;
case INFINI_DTYPE_F64:
diff_impl<double>(_info, reinterpret_cast<double *>(y), reinterpret_cast<const double *>(x));
break;
default:
return INFINI_STATUS_BAD_TENSOR_DTYPE;
}

return INFINI_STATUS_SUCCESS;
}

} // namespace op::diff::cpu
61 changes: 61 additions & 0 deletions src/infiniop/ops/diff/cpu/diff_cpu.h
Original file line number Diff line number Diff line change
@@ -0,0 +1,61 @@
#ifndef __DIFF_CPU_H__
#define __DIFF_CPU_H__

#include "../../../operator.h"
#include "../../../devices/cpu/common_cpu.h"
#include <vector>

namespace op::diff::cpu {

struct DiffInfo {
size_t ndim;
int dim;
int n;
std::vector<size_t> input_shape;
std::vector<size_t> output_shape;
std::vector<ptrdiff_t> input_strides;
std::vector<ptrdiff_t> output_strides;
size_t input_size;
size_t output_size;

static utils::Result<DiffInfo> create(
infiniopTensorDescriptor_t x_desc,
infiniopTensorDescriptor_t y_desc,
int dim,
int n);
};

class Descriptor final : public InfiniopDescriptor {
infiniDtype_t _dtype;
DiffInfo _info;

Descriptor(infiniDtype_t dtype, DiffInfo info,
infiniDevice_t device_type, int device_id)
: InfiniopDescriptor{device_type, device_id},
_dtype(dtype),
_info(std::move(info)) {}

public:
~Descriptor();

static infiniStatus_t create(
infiniopHandle_t handle,
Descriptor **desc_ptr,
infiniopTensorDescriptor_t y_desc,
infiniopTensorDescriptor_t x_desc,
int dim,
int n);

size_t workspaceSize() const { return 0; }

infiniStatus_t calculate(
void *workspace,
size_t workspace_size,
void *y,
const void *x,
void *stream) const;
};

} // namespace op::diff::cpu

#endif // __DIFF_CPU_H__
Loading