Skip to content

Commit 195ae3d

Browse files
committed
merge from planB
2 parents 7243c4f + a27eb65 commit 195ae3d

File tree

6 files changed

+497
-8
lines changed

6 files changed

+497
-8
lines changed

.gitignore

+2-6
Original file line numberDiff line numberDiff line change
@@ -1,6 +1,2 @@
1-
*.i
2-
*.ii
3-
*.gpu
4-
*.ptx
5-
*.cubin
6-
*.fatbin
1+
*.o
2+
*.x

Makefile

+20
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,20 @@
1+
LIBS+=-L/usr/local/cuda/lib64 -lcudart -lcublas -lcusolver -lcurand
2+
3+
#objects:=cusolver_kernel_wrapper.o cusolver_kernel.o main.o
4+
objects:=cusolver_kernel.o curand_kernel.o main.o
5+
6+
all: main.x
7+
main.x: $(objects)
8+
nvcc -g -O3 -o $@ $^ $(LDFLAGS) $(LIBS)
9+
10+
cusolver_kernel.o: cusolver_kernel.cu
11+
nvcc -g -O2 -gencode=arch=compute_70,code=sm_70 -c -o $@ $^
12+
curand_kernel.o: curand_kernel.cu
13+
nvcc -g -O2 -gencode=arch=compute_70,code=sm_70 -c -o $@ $^
14+
main.o: main.cu
15+
nvcc -g -O2 -gencode=arch=compute_70,code=sm_70 -c -o $@ $^
16+
17+
%.o : %.c
18+
icc -c -o $*.o $<
19+
clean:
20+
rm -f ./cu*_kernel_f *.mod *.o

README.md

+7-2
Original file line numberDiff line numberDiff line change
@@ -1,2 +1,7 @@
1-
# cusolver_stream
2-
test stream of cusolver
1+
# Introduction
2+
3+
A demo of cusolver stream on solving eigen problem of a large number of small matrix.
4+
5+
# Conclusion
6+
7+
After analyses with Nsight system, I found that both jacobi and QR method are unable to employ multi-stream. As there are unavoidable pageable memory copies in these two function, which is different from [cublas](https://github.com/zheliu137/cublas_stream)

curand_kernel.cu

+18
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,18 @@
1+
#include <cstdio>
2+
#include <cstdlib>
3+
#include <algorithm>
4+
5+
#include <cuda_runtime.h>
6+
#include <curand.h>
7+
//#include "cuda_settings.h"
8+
9+
void createRandoms(int size, double *h_randomArray){
10+
curandGenerator_t generator;
11+
double *randomArray;
12+
cudaMalloc((void**)&randomArray, size*size*sizeof(double));
13+
// cudaMallocHost((void**)&h_randomArray, size*size*sizeof(double));
14+
curandCreateGenerator(&generator,CURAND_RNG_PSEUDO_XORWOW);
15+
curandSetPseudoRandomGeneratorSeed(generator,(int)time(NULL));
16+
curandGenerateUniformDouble(generator,randomArray,size*size);
17+
cudaMemcpy(h_randomArray, randomArray, sizeof(double) * size * size, cudaMemcpyDeviceToHost);
18+
}

0 commit comments

Comments
 (0)