Include this in your CUDA File.
Example usecase ->
#include "CustomCudaMem.h"
//----kernel or functions here---------
int main()
{
int n = 1 << 10; // or whatver size you want
// Size in bytes for memory allocation
size_t bytes = n * n * sizeof(int);
// Host pointers for matrices
int *h_a, *h_b, *h_c;
// Allocate memory on the host
h_a = (int*)malloc(bytes);
h_b = (int*)malloc(bytes);
h_c = (int*)malloc(bytes);
//custom CUDA memory allocation
int *d_a = (int*)CustomCudaMemory::allocate(bytes);
int *d_b = (int*)CustomCudaMemory::allocate(bytes);
int *d_c = (int*)CustomCudaMemory::allocate(bytes);
//----------------Your other code here --------
CustomCudaMemory::free(d_a);
CustomCudaMemory::free(d_b);
CustomCudaMemory::free(d_c);
// Free host memory
free(h_a);
free(h_b);
free(h_c);
}