10
10
11
11
#include "clblas_funcs.h"
12
12
#include "clBLAS.h"
13
+ #include "../gpu.h"
13
14
14
15
static MatrixShape
15
16
_select_matrix_shape (CArray * array )
@@ -58,29 +59,22 @@ _bad_strides(CArray * ap)
58
59
return 0 ;
59
60
}
60
61
61
- /*
62
- * Helper: dispatch to appropriate cblas_?gemm for typenum.
63
- */
64
62
static void
65
- clgemm (int typenum , clblasOrder order ,
66
- clblasTranspose transA , clblasTranspose transB ,
67
- int m , int n , int k ,
68
- CArray * A , int lda , CArray * B , int ldb , CArray * R )
69
- {
70
- int i ;
71
- const void * Adata = CArray_DATA (A ), * Bdata = CArray_DATA (B );
72
- void * Rdata = CArray_DATA (R );
73
- int ldc = CArray_DIM (R , 1 ) > 1 ? CArray_DIM (R , 1 ) : 1 ;
63
+ cldaxpy (int n_elements , int alpha , double * a , int incX , double * b , int incY ) {
64
+ cl_double alphad = alpha ;
65
+ size_t offsetX = 0 , offsetY = 0 ;
74
66
75
67
cl_int err ;
76
68
cl_platform_id platform = 0 ;
77
69
cl_device_id device = 0 ;
78
70
cl_context_properties props [3 ] = { CL_CONTEXT_PLATFORM , 0 , 0 };
79
71
cl_context ctx = 0 ;
80
72
cl_command_queue queue = 0 ;
81
- cl_mem bufA , bufB , bufC ;
73
+ cl_mem bufA , bufB ;
82
74
cl_event event = NULL ;
83
- int ret = 0 ;
75
+
76
+ ctx = getCLContext ();
77
+ queue = getCLQueue ();
84
78
85
79
/* Setup OpenCL environment. */
86
80
err = clGetPlatformIDs ( 1 , & platform , NULL );
@@ -93,6 +87,50 @@ clgemm(int typenum, clblasOrder order,
93
87
/* Setup clBLAS */
94
88
err = clblasSetup ( );
95
89
90
+ /* Prepare OpenCL memory objects and place matrices inside them. */
91
+ bufA = clCreateBuffer ( ctx , CL_MEM_READ_ONLY , n_elements * sizeof (double ),
92
+ NULL , & err );
93
+
94
+ php_printf ("%d" , err );
95
+
96
+ bufB = clCreateBuffer ( ctx , CL_MEM_READ_ONLY , sizeof (double ),
97
+ NULL , & err );
98
+
99
+
100
+
101
+ err = clEnqueueWriteBuffer ( queue , bufA , CL_TRUE , 0 ,
102
+ n_elements * sizeof (double ), a , 0 , NULL , NULL );
103
+ err = clEnqueueWriteBuffer ( queue , bufB , CL_TRUE , 0 ,
104
+ sizeof (double ), b , 0 , NULL , NULL );
105
+
106
+ err = clblasDaxpy ((size_t )n_elements , alphad , bufA , 0 , incX , bufB , 0 , incY , 1 , & queue , 0 , NULL , & event );
107
+
108
+ return NULL ;
109
+ }
110
+
111
+ /*
112
+ * Helper: dispatch to appropriate cblas_?gemm for typenum.
113
+ */
114
+ static void
115
+ clgemm (int typenum , clblasOrder order ,
116
+ clblasTranspose transA , clblasTranspose transB ,
117
+ int m , int n , int k ,
118
+ CArray * A , int lda , CArray * B , int ldb , CArray * R )
119
+ {
120
+ int i ;
121
+ const void * Adata = CArray_DATA (A ), * Bdata = CArray_DATA (B );
122
+ void * Rdata = CArray_DATA (R );
123
+ int ldc = CArray_DIM (R , 1 ) > 1 ? CArray_DIM (R , 1 ) : 1 ;
124
+
125
+ cl_int err ;
126
+ cl_context ctx = 0 ;
127
+ cl_command_queue queue = 0 ;
128
+ cl_mem bufA , bufB , bufC ;
129
+ cl_event event = NULL ;
130
+ int ret = 0 ;
131
+
132
+ ctx = getCLContext ();
133
+ queue = getCLQueue ();
96
134
97
135
/* Prepare OpenCL memory objects and place matrices inside them. */
98
136
bufA = clCreateBuffer ( ctx , CL_MEM_READ_ONLY , m * k * CArray_DESCR (A )-> elsize ,
@@ -113,6 +151,7 @@ clgemm(int typenum, clblasOrder order,
113
151
cl_float alpha = 1 ;
114
152
cl_float beta = 0 ;
115
153
154
+
116
155
switch (typenum ) {
117
156
case TYPE_DOUBLE_INT :
118
157
/* Call clBLAS extended function. Perform gemm for the lower right sub-matrices */
@@ -135,6 +174,7 @@ clgemm(int typenum, clblasOrder order,
135
174
m * n * CArray_DESCR (R )-> elsize ,
136
175
Rdata , 0 , NULL , NULL );
137
176
177
+
138
178
/* Release OpenCL memory objects. */
139
179
clReleaseMemObject ( bufC );
140
180
clReleaseMemObject ( bufB );
@@ -147,7 +187,6 @@ clgemm(int typenum, clblasOrder order,
147
187
clReleaseCommandQueue ( queue );
148
188
clReleaseContext ( ctx );
149
189
150
-
151
190
}
152
191
153
192
CArray *
@@ -272,7 +311,7 @@ clblas_matrixproduct(int typenum, CArray * ap1, CArray *ap2, CArray *out, Memory
272
311
}
273
312
else {
274
313
/*
275
- * (PyArray_NDIM (ap1) <= 2 && PyArray_NDIM (ap2) <= 2)
314
+ * (CArray_NDIM (ap1) <= 2 && CArray_NDIM (ap2) <= 2)
276
315
* Both ap1 and ap2 are vectors or matrices
277
316
*/
278
317
l = CArray_DIM (ap1 , CArray_NDIM (ap1 ) - 1 );
@@ -321,13 +360,9 @@ clblas_matrixproduct(int typenum, CArray * ap1, CArray *ap2, CArray *out, Memory
321
360
* ((double * )CArray_DATA (ap1 ));
322
361
}
323
362
else if (ap1shape != _matrix ) {
324
- throw_notimplemented_exception ();
363
+ throw_not_implemented_exception ();
325
364
return NULL ;
326
- /**cblas_daxpy(l,
327
- *((double *)PyArray_DATA(ap2)),
328
- (double *)PyArray_DATA(ap1),
329
- ap1stride/sizeof(double),
330
- (double *)PyArray_DATA(out_buf), 1);*/
365
+ //cldaxpy(ap1, ap2, out_buf);
331
366
}
332
367
else {
333
368
int maxind , oind , i , a1s , outs ;
@@ -343,12 +378,9 @@ clblas_matrixproduct(int typenum, CArray * ap1, CArray *ap2, CArray *out, Memory
343
378
a1s = CArray_STRIDE (ap1 , maxind ) / sizeof (double );
344
379
outs = CArray_STRIDE (out_buf , maxind ) / sizeof (double );
345
380
for (i = 0 ; i < CArray_DIM (ap1 , oind ); i ++ ) {
346
- //cblas_daxpy(l, val, (double *)ptr, a1s,
347
- //(double *)optr, outs);
381
+ cldaxpy (l , val , (double * )ptr , a1s , (double * )optr , outs );
348
382
ptr += CArray_STRIDE (ap1 , oind );
349
383
optr += CArray_STRIDE (out_buf , oind );
350
- throw_notimplemented_exception ();
351
- return NULL ;
352
384
}
353
385
}
354
386
}
0 commit comments