Skip to content
This repository was archived by the owner on Feb 18, 2020. It is now read-only.

Commit d1cbedd

Browse files
author
C1312543 Henrique Saviatto Borba
committed
CArray now preloads the GPU context from OpenCL during PHPMINIT.
1 parent eb916c5 commit d1cbedd

File tree

6 files changed

+128
-32
lines changed

6 files changed

+128
-32
lines changed

config.m4

+1
Original file line numberDiff line numberDiff line change
@@ -157,6 +157,7 @@ PHP_NEW_EXTENSION(carray,
157157
kernel/convert_datatype.c \
158158
kernel/dtype_transfer.c \
159159
kernel/assign_scalar.c \
160+
kernel/gpu.c \
160161
kernel/common/exceptions.c \
161162
kernel/item_selection.c \
162163
kernel/clip.c \

kernel/common/clblas_funcs.c

+58-26
Original file line numberDiff line numberDiff line change
@@ -10,6 +10,7 @@
1010

1111
#include "clblas_funcs.h"
1212
#include "clBLAS.h"
13+
#include "../gpu.h"
1314

1415
static MatrixShape
1516
_select_matrix_shape(CArray *array)
@@ -58,29 +59,22 @@ _bad_strides(CArray * ap)
5859
return 0;
5960
}
6061

61-
/*
62-
* Helper: dispatch to appropriate cblas_?gemm for typenum.
63-
*/
6462
static void
65-
clgemm(int typenum, clblasOrder order,
66-
clblasTranspose transA, clblasTranspose transB,
67-
int m, int n, int k,
68-
CArray *A, int lda, CArray *B, int ldb, CArray *R)
69-
{
70-
int i ;
71-
const void *Adata = CArray_DATA(A), *Bdata = CArray_DATA(B);
72-
void *Rdata = CArray_DATA(R);
73-
int ldc = CArray_DIM(R, 1) > 1 ? CArray_DIM(R, 1) : 1;
63+
cldaxpy(int n_elements, int alpha, double *a, int incX, double *b, int incY) {
64+
cl_double alphad = alpha;
65+
size_t offsetX = 0, offsetY = 0;
7466

7567
cl_int err;
7668
cl_platform_id platform = 0;
7769
cl_device_id device = 0;
7870
cl_context_properties props[3] = { CL_CONTEXT_PLATFORM, 0, 0 };
7971
cl_context ctx = 0;
8072
cl_command_queue queue = 0;
81-
cl_mem bufA, bufB, bufC;
73+
cl_mem bufA, bufB;
8274
cl_event event = NULL;
83-
int ret = 0;
75+
76+
ctx = getCLContext();
77+
queue = getCLQueue();
8478

8579
/* Setup OpenCL environment. */
8680
err = clGetPlatformIDs( 1, &platform, NULL );
@@ -93,6 +87,50 @@ clgemm(int typenum, clblasOrder order,
9387
/* Setup clBLAS */
9488
err = clblasSetup( );
9589

90+
/* Prepare OpenCL memory objects and place matrices inside them. */
91+
bufA = clCreateBuffer( ctx, CL_MEM_READ_ONLY, n_elements * sizeof(double),
92+
NULL, &err );
93+
94+
php_printf("%d", err);
95+
96+
bufB = clCreateBuffer( ctx, CL_MEM_READ_ONLY, sizeof(double),
97+
NULL, &err );
98+
99+
100+
101+
err = clEnqueueWriteBuffer( queue, bufA, CL_TRUE, 0,
102+
n_elements * sizeof(double), a, 0, NULL, NULL );
103+
err = clEnqueueWriteBuffer( queue, bufB, CL_TRUE, 0,
104+
sizeof(double), b, 0, NULL, NULL );
105+
106+
err = clblasDaxpy((size_t)n_elements, alphad, bufA, 0, incX, bufB, 0, incY, 1, &queue, 0, NULL, &event);
107+
108+
return NULL;
109+
}
110+
111+
/*
112+
* Helper: dispatch to appropriate cblas_?gemm for typenum.
113+
*/
114+
static void
115+
clgemm(int typenum, clblasOrder order,
116+
clblasTranspose transA, clblasTranspose transB,
117+
int m, int n, int k,
118+
CArray *A, int lda, CArray *B, int ldb, CArray *R)
119+
{
120+
int i ;
121+
const void *Adata = CArray_DATA(A), *Bdata = CArray_DATA(B);
122+
void *Rdata = CArray_DATA(R);
123+
int ldc = CArray_DIM(R, 1) > 1 ? CArray_DIM(R, 1) : 1;
124+
125+
cl_int err;
126+
cl_context ctx = 0;
127+
cl_command_queue queue = 0;
128+
cl_mem bufA, bufB, bufC;
129+
cl_event event = NULL;
130+
int ret = 0;
131+
132+
ctx = getCLContext();
133+
queue = getCLQueue();
96134

97135
/* Prepare OpenCL memory objects and place matrices inside them. */
98136
bufA = clCreateBuffer( ctx, CL_MEM_READ_ONLY, m * k * CArray_DESCR(A)->elsize,
@@ -113,6 +151,7 @@ clgemm(int typenum, clblasOrder order,
113151
cl_float alpha = 1;
114152
cl_float beta = 0;
115153

154+
116155
switch (typenum) {
117156
case TYPE_DOUBLE_INT:
118157
/* Call clBLAS extended function. Perform gemm for the lower right sub-matrices */
@@ -135,6 +174,7 @@ clgemm(int typenum, clblasOrder order,
135174
m * n * CArray_DESCR(R)->elsize,
136175
Rdata, 0, NULL, NULL );
137176

177+
138178
/* Release OpenCL memory objects. */
139179
clReleaseMemObject( bufC );
140180
clReleaseMemObject( bufB );
@@ -147,7 +187,6 @@ clgemm(int typenum, clblasOrder order,
147187
clReleaseCommandQueue( queue );
148188
clReleaseContext( ctx );
149189

150-
151190
}
152191

153192
CArray *
@@ -272,7 +311,7 @@ clblas_matrixproduct(int typenum, CArray * ap1, CArray *ap2, CArray *out, Memory
272311
}
273312
else {
274313
/*
275-
* (PyArray_NDIM(ap1) <= 2 && PyArray_NDIM(ap2) <= 2)
314+
* (CArray_NDIM(ap1) <= 2 && CArray_NDIM(ap2) <= 2)
276315
* Both ap1 and ap2 are vectors or matrices
277316
*/
278317
l = CArray_DIM(ap1, CArray_NDIM(ap1) - 1);
@@ -321,13 +360,9 @@ clblas_matrixproduct(int typenum, CArray * ap1, CArray *ap2, CArray *out, Memory
321360
*((double *)CArray_DATA(ap1));
322361
}
323362
else if (ap1shape != _matrix) {
324-
throw_notimplemented_exception();
363+
throw_not_implemented_exception();
325364
return NULL;
326-
/**cblas_daxpy(l,
327-
*((double *)PyArray_DATA(ap2)),
328-
(double *)PyArray_DATA(ap1),
329-
ap1stride/sizeof(double),
330-
(double *)PyArray_DATA(out_buf), 1);*/
365+
//cldaxpy(ap1, ap2, out_buf);
331366
}
332367
else {
333368
int maxind, oind, i, a1s, outs;
@@ -343,12 +378,9 @@ clblas_matrixproduct(int typenum, CArray * ap1, CArray *ap2, CArray *out, Memory
343378
a1s = CArray_STRIDE(ap1, maxind) / sizeof(double);
344379
outs = CArray_STRIDE(out_buf, maxind) / sizeof(double);
345380
for (i = 0; i < CArray_DIM(ap1, oind); i++) {
346-
//cblas_daxpy(l, val, (double *)ptr, a1s,
347-
//(double *)optr, outs);
381+
cldaxpy(l, val, (double *)ptr, a1s, (double *)optr, outs);
348382
ptr += CArray_STRIDE(ap1, oind);
349383
optr += CArray_STRIDE(out_buf, oind);
350-
throw_notimplemented_exception();
351-
return NULL;
352384
}
353385
}
354386
}

kernel/gpu.c

+42
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,42 @@
1+
#include "config.h"
2+
3+
#ifdef HAVE_CLBLAS
4+
#include "gpu.h"
5+
#include "clBLAS.h"
6+
7+
cl_context ctx;
8+
cl_command_queue queue;
9+
10+
void
11+
start_clblas_context() {
12+
cl_context_properties props[3] = { CL_CONTEXT_PLATFORM, 0, 0 };
13+
cl_platform_id platform = 0;
14+
cl_device_id device = 0;
15+
cl_int err;
16+
17+
18+
/* Setup OpenCL environment. */
19+
err = clGetPlatformIDs( 1, &platform, NULL );
20+
err = clGetDeviceIDs( platform, CL_DEVICE_TYPE_GPU, 1, &device, NULL );
21+
22+
props[1] = (cl_context_properties)platform;
23+
24+
ctx = clCreateContext( props, 1, &device, NULL, NULL, &err );
25+
queue = clCreateCommandQueue( ctx, device, 0, &err );
26+
27+
/* Setup clBLAS */
28+
err = clblasSetup( );
29+
}
30+
31+
32+
cl_context
33+
getCLContext() {
34+
return ctx;
35+
}
36+
37+
cl_command_queue
38+
getCLQueue() {
39+
return queue;
40+
}
41+
42+
#endif

kernel/gpu.h

+15
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,15 @@
1+
#ifndef PHPSCI_EXT_GPU_H
2+
#define PHPSCI_EXT_GPU_H
3+
4+
#include "config.h"
5+
6+
#ifdef HAVE_CLBLAS
7+
#include "clBLAS.h"
8+
9+
void start_clblas_context();
10+
cl_command_queue getCLQueue();
11+
cl_context getCLContext();
12+
#endif
13+
14+
15+
#endif //PHPSCI_EXT_GPU_H

kernel/linalg.c

+5-6
Original file line numberDiff line numberDiff line change
@@ -98,10 +98,9 @@ FLOAT_dot(char *ip1, int is1, char *ip2, int is2, char *op, int n)
9898
void
9999
DOUBLE_dot(char *ip1, int is1, char *ip2, int is2, char *op, int n)
100100
{
101+
#ifdef HAVE_CBLAS
101102
int is1b = blas_stride(is1, sizeof(double));
102103
int is2b = blas_stride(is2, sizeof(double));
103-
104-
#ifdef HAVE_CBLAS
105104
if (is1b && is2b)
106105
{
107106
double sum = 0.;
@@ -160,27 +159,27 @@ CArray_Matmul(CArray * ap1, CArray * ap2, CArray * out, MemoryPointer * ptr)
160159
CArrayIterator * it1, * it2;
161160
char * op;
162161

163-
if (CArray_NDIM(ap1) == 0 || CArray_NDIM(ap2) == 0) {
162+
/**if (CArray_NDIM(ap1) == 0 || CArray_NDIM(ap2) == 0) {
164163
throw_valueerror_exception("Scalar operands are not allowed, use '*' instead");
165164
return NULL;
166-
}
165+
}**/
167166
typenum = CArray_ObjectType(ap1, 0);
168167
typenum = CArray_ObjectType(ap2, typenum);
169168

170169
nd1 = CArray_NDIM(ap1);
171170
nd2 = CArray_NDIM(ap2);
172171

173172
#ifdef HAVE_BLAS
173+
#ifndef HAVE_CLBLAS
174174
if (nd1 <= 2 && nd2 <= 2 && (TYPE_DOUBLE_INT == typenum || TYPE_FLOAT_INT == typenum)) {
175175
return cblas_matrixproduct(typenum, ap1, ap2, out, ptr);
176176
}
177177
#endif
178+
#endif
178179

179180
#ifdef HAVE_CLBLAS
180181
if (nd1 <= 2 && nd2 <= 2 && (TYPE_DOUBLE_INT == typenum || TYPE_FLOAT_INT == typenum)) {
181182
return clblas_matrixproduct(typenum, ap1, ap2, out, ptr);
182-
php_printf("FOI");
183-
return NULL;
184183
}
185184
#endif
186185

phpsci.c

+7
Original file line numberDiff line numberDiff line change
@@ -59,6 +59,10 @@
5959
#include "kernel/storage.h"
6060
#include "kernel/round.h"
6161

62+
#ifdef HAVE_CLBLAS
63+
#include "kernel/gpu.h"
64+
#endif
65+
6266
typedef struct _zend_carray_cdata {
6367
zend_object std;
6468
} end_carray_cdata;
@@ -2948,6 +2952,9 @@ static PHP_MINIT_FUNCTION(carray)
29482952
carray_object_handlers.compare_objects = carray_compare;
29492953
carray_object_handlers.count_elements = carray_count;
29502954

2955+
#ifdef HAVE_CLBLAS
2956+
start_clblas_context();
2957+
#endif
29512958

29522959
zend_class_implements(carray_sc_entry, 1, zend_ce_arrayaccess);
29532960

0 commit comments

Comments
 (0)