-
Notifications
You must be signed in to change notification settings - Fork 10
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
The program is currently available for computing earthquake tsunami on 1st layer with linear solver.
- Loading branch information
Showing
27 changed files
with
15,477 additions
and
0 deletions.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,42 @@ | ||
#ifndef SM_CONFG | ||
#define SM_CONFG | ||
#define LOAD_PER_SM 2 | ||
#define NUMSTREAM 4 | ||
extern cudaStream_t EXECstream[NUMSTREAM]; | ||
#endif | ||
|
||
#ifndef MOMT_KERNEL_CONFG | ||
#define MOMT_KERNEL_CONFG | ||
#define BLOCKX 16 // ==> along column axis | ||
#define EXECX 15 // BLOCKX-1 | ||
#define BLOCKY 16 // ==> along row axis | ||
#define EXECY 15// BLOCKY-1 | ||
|
||
extern dim3 DimBlockMomt; | ||
extern dim3 DimGridMomt; | ||
|
||
#define BLOCKX_MOMT 64 | ||
extern dim3 DimBlockMomt_MN; | ||
extern dim3 DimGridMomt_MN; | ||
#endif | ||
|
||
#ifndef MASS_KERNEL_CONFG | ||
#define MASS_KERNEL_CONFG | ||
#define BLOCKX_MASS 64 | ||
extern dim3 DimBlockMass; | ||
extern dim3 DimGridMass; | ||
#endif | ||
|
||
#ifndef OPENBD_KERNEL_CONFG | ||
#define OPENBD_KERNEL_CONFG | ||
#define BLOCKX_OPENBD 64 | ||
extern dim3 DimBlockOpenBD; | ||
extern dim3 DimGridOpenBD_LR; | ||
extern dim3 DimGridOpenBD_TB; | ||
#endif | ||
|
||
#ifndef MAXAMP_KERNEL_CONFG | ||
#define MAXAMP_KERNEL_CONFG | ||
#define MAXAMP_BLOCK 512 | ||
extern size_t GridMaxAmp; | ||
#endif |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,70 @@ | ||
#include <stdio.h> | ||
#include "cuda.h" | ||
#include <time.h> | ||
|
||
//#define DEBUG | ||
#ifdef DEBUG | ||
#ifndef DEBUG_FUNC | ||
#define DEBUG_FUNC | ||
#define ERROR 1.0e-4 | ||
#define CHKR 720 | ||
#define CHKC 304 | ||
#define CHKSI 10 | ||
#define ID_hst(row,col) col*size_hst[0] + row | ||
//#define ID2E_hst(row,col,dim) size_hst[2]*dim + ID(row,col) | ||
extern float* tmpout; | ||
#endif | ||
#endif | ||
|
||
#ifndef CONSTS | ||
#define CONSTS | ||
#define GX 1.0e-5 | ||
#define EPS 1.0e-10 | ||
#define TWLVTH 0.0833333333333333333 | ||
#define GRAV 9.807 | ||
#endif | ||
|
||
|
||
#ifndef CUDA_CHK | ||
#define CUDA_CHK | ||
#define cudaCHK(FUN) ({\ | ||
if ((FUN) != cudaSuccess) {\ | ||
printf("%s in %s at line %d\n",cudaGetErrorString(cudaGetLastError()), __FILE__,__LINE__);\ | ||
exit(EXIT_FAILURE);\ | ||
}\ | ||
}) | ||
#define cudaERROR(err) ({\ | ||
if (err != cudaSuccess) {\ | ||
printf("error code: %d\n", err);\ | ||
printf("%s in %s at line %d\n",cudaGetErrorString(err), __FILE__,__LINE__);\ | ||
exit(EXIT_FAILURE);\ | ||
}\ | ||
}) | ||
#endif | ||
|
||
#ifndef CUDA_KERNEL | ||
#define CUDA_KERNEL | ||
#define ID(row,col) (col)*size_dev[0] + row | ||
#define ID2E(row,col,dim) size_dev[2]*(dim) + ID(row,col) | ||
#endif | ||
|
||
|
||
#ifndef CUDA_GLOB_VAR | ||
extern float *Zout_hst, *MNout_hst; | ||
extern float *MNdat_hst, *Zdat_hst; | ||
extern float *R24_hst, *R35_hst, *H_hst; | ||
//float *R1_hst, *R6_hst, *R11_hst; | ||
extern float *R_MASS_hst; | ||
|
||
// extern __device__ float *R35_dev; | ||
// extern __device__ float *R24_dev, *H_dev; | ||
// extern __device__ float *Z_dat_dev, *MN_dat_dev; | ||
extern __device__ float *MN_out_dev, *Z_out_dev; | ||
extern __constant__ __device__ uint32_t size_dev[4]; | ||
// extern texture <float, cudaTextureType2D, cudaReadModeElementType> ZtexRef; | ||
// extern texture <float, cudaTextureType2D, cudaReadModeElementType> MtexRef; | ||
// extern texture <float, cudaTextureType2D, cudaReadModeElementType> NtexRef; | ||
extern float *Zmax_hst; | ||
extern uint32_t size_hst[4]; | ||
extern cudaDeviceProp dev_prop; | ||
#endif |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,85 @@ | ||
#include "GPUHeader.h" | ||
#include "GPUConfig.h" | ||
|
||
extern "C" void mass_launch_(const float*, float*, const float*); | ||
__global__ void mass_kernel(const float* __restrict__,const float* __restrict__, | ||
const float* __restrict__,const float* __restrict__); | ||
|
||
|
||
|
||
extern "C" void mass_launch_(const float* Z_f, float* Z_f_complete, const float *H_f){ | ||
cudaError_t err; | ||
clock_t st, fi; | ||
|
||
// cudaCHK( cudaMemcpy(H_hst, H_f, size_hst[3], cudaMemcpyHostToDevice) ); | ||
//cudaCHK( cudaMemcpy(Zdat_hst, Z_f, size_hst[3], cudaMemcpyHostToDevice) ); | ||
|
||
st = clock(); | ||
mass_kernel <<< DimGridMass, DimBlockMass >>> (Zdat_hst, MNdat_hst, R_MASS_hst, H_hst);// FUTURE MULTIPLE KERNELS | ||
cudaDeviceSynchronize(); | ||
err = cudaGetLastError(); | ||
cudaERROR(err); | ||
fi = clock(); | ||
|
||
|
||
#ifdef DEBUG | ||
printf("TIME SPENT ON GPU %f\n",(float)(fi-st)/CLOCKS_PER_SEC); | ||
// printf("printing information for debugging\n" ); | ||
// cudaCHK( cudaMemcpy(tmpout, Zout_hst, size_hst[3], cudaMemcpyDeviceToHost) ); | ||
// for (size_t i = 0; i < size_hst[2]; i++) { | ||
// if (abs(tmpout[i] - Z_f_complete[i]) > ERROR) { | ||
// printf("Z[%d] Z_cu:%e Z_f:%e %e\n", i, tmpout[i], Z_f_complete[i], tmpout[i] - Z_f_complete[i]); | ||
// } | ||
// } | ||
#else | ||
// cudaCHK( cudaMemcpy(Z_f_complete, Zout_hst, size_hst[3], cudaMemcpyDeviceToHost) ); | ||
#endif | ||
} | ||
|
||
__global__ void mass_kernel(const float* __restrict__ Z, const float* __restrict__ MN, | ||
const float* __restrict__ R_MASS, const float* __restrict__ H){ | ||
/*+-->+-->+---->| | ||
+-->+-->+---->| | ||
+-->+-->+---->| | ||
+-->+-->+---->| | ||
*/ | ||
//designed for architectures whose warpsize=32 | ||
uint32_t row = blockIdx.x*31*(blockDim.x>>5) + 31*(threadIdx.x>>5) + threadIdx.x%32; | ||
uint32_t col = blockIdx.y*(size_dev[1]/gridDim.y); | ||
uint32_t col_end = (blockIdx.y == gridDim.y-1)? size_dev[1]-1:(blockIdx.y+1)*(size_dev[1]/gridDim.y)+1; | ||
float h,z; | ||
float m, m_suf; | ||
float n, n_prev; | ||
float ztmp; | ||
float r1, r11; | ||
float r6, r6_prev; | ||
|
||
n_prev = MN[ID2E(row,col,1)]; | ||
r6_prev = R_MASS[col*4+1]; | ||
|
||
for (uint32_t i = col+1; i < col_end; i++) { | ||
if (threadIdx.x%32 == 0) { | ||
r1 = R_MASS[i*4]; | ||
r6 = R_MASS[i*4+1]; | ||
r11 = R_MASS[i*4+2]; | ||
} | ||
__syncwarp(); | ||
r1 = __shfl_sync(0xFFFFFFFF,r1,0); | ||
r6 = __shfl_sync(0xFFFFFFFF,r6,0); | ||
r11 = __shfl_sync(0xFFFFFFFF,r11,0); | ||
m = MN[ID(row,i)]; | ||
h = H[ID(row,i)]; | ||
z = Z[ID(row,i)]; | ||
n = MN[ID2E(row,i,1)]; | ||
m_suf = __shfl_up_sync(0xFFFFFFFF,m,1); | ||
if (threadIdx.x%32 != 0 && row < size_dev[0]-1) { | ||
ztmp = z - r1*(m-m_suf) - r11*(n*r6-n_prev*r6_prev); | ||
if (ztmp + h <= EPS) ztmp = -h; | ||
if (h <= GX || (ztmp < EPS && -ztmp < EPS) ) ztmp = 0.0; | ||
Z_out_dev[ID(row,i)] = ztmp; | ||
|
||
r6_prev = r6; | ||
n_prev = n; | ||
} | ||
} | ||
} |
Oops, something went wrong.