Skip to content

Commit 221abf1

Browse files
committed
using of constant memory and pinned memory
1 parent 334bda1 commit 221abf1

File tree

5 files changed

+145
-29
lines changed

5 files changed

+145
-29
lines changed

CMakeLists.txt

+1-1
Original file line numberDiff line numberDiff line change
@@ -3,7 +3,7 @@ project(Image_Kernel_Processing_CUDA CUDA)
33

44
set(CMAKE_CUDA_STANDARD 14)
55

6-
add_executable(Image_Kernel_Processing_CUDA main.cu Image.h Image.cu Utils.h Utils.cu stb_image.h stb_image_write.h Kernel.cu Kernel.h Processing.cu Processing.h CUDA_check.h)
6+
add_executable(Image_Kernel_Processing_CUDA main.cu Image.h Image.cu Utils.h Utils.cu stb_image.h stb_image_write.h Kernel.cu Kernel.h Processing.cu Processing.h CUDA_check.h Parameters.h)
77

88
set_target_properties(Image_Kernel_Processing_CUDA PROPERTIES
99
CUDA_SEPARABLE_COMPILATION ON)

Image.cu

+22
Original file line numberDiff line numberDiff line change
@@ -4,6 +4,10 @@
44

55
#include "Image.h"
66
#include <stdlib.h>
7+
#include "Parameters.h"
8+
#include "CUDA_check.h"
9+
10+
#define CUDA_CHECK_RETURN(value) CheckCudaErrorAux(__FILE__,__LINE__, #value, value)
711

812
Image *Image_new(int width, int height, int channels, unsigned char *data) {
913
Image *img = (Image *) malloc(sizeof(Image));
@@ -13,19 +17,37 @@ Image *Image_new(int width, int height, int channels, unsigned char *data) {
1317
Image_setChannels(img, channels);
1418
Image_setPitch(img, width * channels);
1519

20+
#ifdef PINNED_MEMORY
21+
unsigned char *dataPinned;
22+
CUDA_CHECK_RETURN(cudaMallocHost((void**)&dataPinned, sizeof(unsigned char) * width * height * channels));
23+
memcpy(dataPinned, data, sizeof(unsigned char) * width * height * channels);
24+
Image_setData(img, dataPinned);
25+
#else
1626
Image_setData(img, data);
27+
#endif
28+
1729
return img;
1830
}
1931

2032
Image *Image_new_empty(int width, int height, int channels) {
33+
#ifdef PINNED_MEMORY
34+
unsigned char *data;
35+
CUDA_CHECK_RETURN(cudaMallocHost((void**)&data, sizeof(unsigned char) * width * height * channels));
36+
#else
2137
unsigned char *data = (unsigned char *) malloc(sizeof(unsigned char) * width * height * channels);
38+
#endif
39+
2240
return Image_new(width, height, channels, data);
2341
}
2442

2543
void Image_delete(Image *img) {
2644
if (img != NULL) {
2745
if (Image_getData(img) != NULL) {
46+
#ifdef PINNED_MEMORY
47+
CUDA_CHECK_RETURN(cudaFreeHost(Image_getData(img)));
48+
#else
2849
free(Image_getData(img));
50+
#endif
2951
}
3052
free(img);
3153
}

Parameters.h

+37
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,37 @@
1+
//
2+
// Created by kevin on 03/11/21.
3+
//
4+
5+
#ifndef IMAGE_KERNEL_PROCESSING_CUDA_PARAMETERS_H
6+
#define IMAGE_KERNEL_PROCESSING_CUDA_PARAMETERS_H
7+
8+
#define CONSTANT_MEMORY
9+
//#define SHARED_MEMORY
10+
//#define PINNED_MEMORY
11+
12+
/**
13+
* Min value of kernel dimension to test (MUST be odd)
14+
*/
15+
const int KERNEL_DIM_MIN = 19;
16+
/**
17+
* Max value of kernel dimension to test (MUST be odd)
18+
*/
19+
const int KERNEL_DIM_MAX = 19;
20+
/**
21+
* Step on values of kernel dimension (MUST be even)
22+
*/
23+
const int KERNEL_DIM_STEP = 6;
24+
/**
25+
* Image dimension to test: 4K, 5K, 6K, 7K or 8K
26+
*/
27+
const char IMAGE_DIMENSION[] = "5K";
28+
/**
29+
* Number of image of each dimension to test (max 3)
30+
*/
31+
const int IMAGE_QUANTITY = 3;
32+
/**
33+
* Number of times to test each image
34+
*/
35+
const int REPETITIONS = 1;
36+
37+
#endif //IMAGE_KERNEL_PROCESSING_CUDA_PARAMETERS_H

Processing.cu

+84-3
Original file line numberDiff line numberDiff line change
@@ -4,9 +4,12 @@
44

55
#include "Processing.h"
66
#include "CUDA_check.h"
7+
#include "Parameters.h"
78

89
#define CUDA_CHECK_RETURN(value) CheckCudaErrorAux(__FILE__,__LINE__, #value, value)
10+
#define BLOCK_SIZE 32
911

12+
#if !defined CONSTANT_MEMORY && !defined SHARED_MEMORY
1013
__global__ void
1114
kernel(unsigned char *input, unsigned long long int *krn, unsigned char *output, int height, int width, int channels,
1215
int size, double weight) {
@@ -66,8 +69,8 @@ Image *process(Image *img, Kernel *krn) {
6669
sizeof(unsigned long long int) * krn->size * krn->size,
6770
cudaMemcpyHostToDevice));
6871

69-
dim3 blockDim(32, 32);
70-
dim3 gridDim(ceil(((float) img->width) / blockDim.x), ceil(((float) img->height) / blockDim.y));
72+
dim3 blockDim(BLOCK_SIZE, BLOCK_SIZE);
73+
dim3 gridDim(ceil(((float) img->width) / BLOCK_SIZE), ceil(((float) img->height) / BLOCK_SIZE));
7174

7275
kernel<<<gridDim, blockDim>>>(d_input, d_krn, d_output, img->height, img->width, img->channels, krn->size,
7376
krn->weight);
@@ -82,4 +85,82 @@ Image *process(Image *img, Kernel *krn) {
8285
cudaFree(d_output);
8386

8487
return res;
85-
}
88+
}
89+
#endif
90+
91+
#if defined CONSTANT_MEMORY && !defined SHARED_MEMORY
92+
__constant__ unsigned long long int KERNEL[25 * 25];
93+
94+
__global__ void
95+
kernelConstant(unsigned char *input, unsigned char *output, int height, int width, int channels,
96+
int size, double weight) {
97+
int iy = blockIdx.y * blockDim.y + threadIdx.y;
98+
int ix = blockIdx.x * blockDim.x + threadIdx.x;
99+
100+
if (iy < height && ix < width) {
101+
int kCenter = size / 2;
102+
int dx, dy, px, py;
103+
104+
for (int ic = 0; ic < channels; ic++) {
105+
// vars "i?" identify image's element
106+
unsigned long long int newVal = 0;
107+
for (int ky = 0; ky < size; ky++) {
108+
for (int kx = 0; kx < size; kx++) {
109+
// vars "k?" identify kernel's element
110+
dx = kx - kCenter;
111+
dy = ky - kCenter;
112+
// vars "d?" identify kernel's element's position with respect to the center
113+
px = ix + dx;
114+
py = iy + dy;
115+
// vars "p?" identify the pixel to combine with kernel's element
116+
117+
if (px < 0 || px >= width) { // edge handling: extend
118+
px = (px < 0) ? 0 : (width - 1);
119+
}
120+
if (py < 0 || py >= height) {
121+
py = (py < 0) ? 0 : (height - 1);
122+
}
123+
124+
newVal += (unsigned long long int) input[py * width * channels + px * channels + ic] *
125+
KERNEL[ky * size + kx];
126+
}
127+
}
128+
newVal = (unsigned long long int) ((long double) newVal * weight);
129+
output[iy * width * channels + ix * channels + ic] = (unsigned char) newVal;
130+
}
131+
}
132+
}
133+
134+
Image *process(Image *img, Kernel *krn) {
135+
Image *res = Image_new_empty(img->width, img->height, img->channels);
136+
137+
unsigned char *d_input;
138+
unsigned char *d_output;
139+
140+
CUDA_CHECK_RETURN(cudaMalloc((void **) &d_input, sizeof(unsigned char) * img->width * img->height * img->channels));
141+
CUDA_CHECK_RETURN(
142+
cudaMalloc((void **) &d_output, sizeof(unsigned char) * img->width * img->height * img->channels));
143+
144+
CUDA_CHECK_RETURN(cudaMemcpy((void *) d_input, (void *) img->data,
145+
sizeof(unsigned char) * img->width * img->height * img->channels,
146+
cudaMemcpyHostToDevice));
147+
CUDA_CHECK_RETURN(cudaMemcpyToSymbol(KERNEL, (void *) krn->coefficients,
148+
sizeof(unsigned long long int) * krn->size * krn->size));
149+
150+
dim3 blockDim(BLOCK_SIZE, BLOCK_SIZE);
151+
dim3 gridDim(ceil(((float) img->width) / BLOCK_SIZE), ceil(((float) img->height) / BLOCK_SIZE));
152+
153+
kernelConstant<<<gridDim, blockDim>>>(d_input, d_output, img->height, img->width, img->channels, krn->size,
154+
krn->weight);
155+
cudaDeviceSynchronize();
156+
157+
CUDA_CHECK_RETURN(cudaMemcpy((void *) res->data, (void *) d_output,
158+
sizeof(unsigned char) * img->width * img->height * img->channels,
159+
cudaMemcpyDeviceToHost));
160+
161+
cudaFree(d_input);
162+
cudaFree(d_output);
163+
164+
return res;
165+
}
166+
#endif

main.cu

+1-25
Original file line numberDiff line numberDiff line change
@@ -6,31 +6,7 @@
66
#include "Utils.h"
77
#include "Kernel.h"
88
#include "Processing.h"
9-
10-
/**
11-
* Min value of kernel dimension to test (MUST be odd)
12-
*/
13-
const int KERNEL_DIM_MIN = 25;
14-
/**
15-
* Max value of kernel dimension to test (MUST be odd)
16-
*/
17-
const int KERNEL_DIM_MAX = 25;
18-
/**
19-
* Step on values of kernel dimension (MUST be even)
20-
*/
21-
const int KERNEL_DIM_STEP = 6;
22-
/**
23-
* Image dimension to test: 4K, 5K, 6K, 7K or 8K
24-
*/
25-
const char IMAGE_DIMENSION[] = "4K";
26-
/**
27-
* Number of image of each dimension to test (max 3)
28-
*/
29-
const int IMAGE_QUANTITY = 3;
30-
/**
31-
* Number of times to test each image
32-
*/
33-
const int REPETITIONS = 1;
9+
#include "Parameters.h"
3410

3511
void saveTextFile(int *kDim, double *times, char *filename) {
3612
FILE *file = fopen(filename, "w");

0 commit comments

Comments
 (0)