@@ -469,31 +469,31 @@ int main(int argc, char **argv)
469
469
CL_SAFE_CALL ( clSetKernelArg (R1W1Kernel , 4 , sizeof (cl_int ), (void * ) & dim_x ) );
470
470
CL_SAFE_CALL ( clSetKernelArg (R1W1Kernel , 5 , sizeof (cl_int ), (void * ) & halo ) );
471
471
472
- CL_SAFE_CALL ( clSetKernelArg (R2W1Kernel , 0 , sizeof (cl_mem ), (void * ) & deviceA ) );
473
- CL_SAFE_CALL ( clSetKernelArg (R2W1Kernel , 1 , sizeof (cl_mem ), (void * ) & deviceB ) );
474
- CL_SAFE_CALL ( clSetKernelArg (R2W1Kernel , 2 , sizeof (cl_mem ), (void * ) & deviceD ) );
475
- CL_SAFE_CALL ( clSetKernelArg (R2W1Kernel , 3 , sizeof (cl_int ), (void * ) & pad ) );
476
- CL_SAFE_CALL ( clSetKernelArg (R2W1Kernel , 4 , sizeof (cl_int ), (void * ) & pad_x ) );
477
- CL_SAFE_CALL ( clSetKernelArg (R2W1Kernel , 5 , sizeof (cl_int ), (void * ) & dim_x ) );
478
- CL_SAFE_CALL ( clSetKernelArg (R2W1Kernel , 6 , sizeof (cl_int ), (void * ) & halo ) );
479
-
480
- CL_SAFE_CALL ( clSetKernelArg (R3W1Kernel , 0 , sizeof (cl_mem ), (void * ) & deviceA ) );
481
- CL_SAFE_CALL ( clSetKernelArg (R3W1Kernel , 1 , sizeof (cl_mem ), (void * ) & deviceB ) );
482
- CL_SAFE_CALL ( clSetKernelArg (R3W1Kernel , 2 , sizeof (cl_mem ), (void * ) & deviceC ) );
483
- CL_SAFE_CALL ( clSetKernelArg (R3W1Kernel , 3 , sizeof (cl_mem ), (void * ) & deviceD ) );
484
- CL_SAFE_CALL ( clSetKernelArg (R3W1Kernel , 4 , sizeof (cl_int ), (void * ) & pad ) );
485
- CL_SAFE_CALL ( clSetKernelArg (R3W1Kernel , 5 , sizeof (cl_int ), (void * ) & pad_x ) );
486
- CL_SAFE_CALL ( clSetKernelArg (R3W1Kernel , 6 , sizeof (cl_int ), (void * ) & dim_x ) );
487
- CL_SAFE_CALL ( clSetKernelArg (R3W1Kernel , 7 , sizeof (cl_int ), (void * ) & halo ) );
488
-
489
- CL_SAFE_CALL ( clSetKernelArg (R2W2Kernel , 0 , sizeof (cl_mem ), (void * ) & deviceA ) );
490
- CL_SAFE_CALL ( clSetKernelArg (R2W2Kernel , 1 , sizeof (cl_mem ), (void * ) & deviceB ) );
491
- CL_SAFE_CALL ( clSetKernelArg (R2W2Kernel , 2 , sizeof (cl_mem ), (void * ) & deviceC ) );
492
- CL_SAFE_CALL ( clSetKernelArg (R2W2Kernel , 3 , sizeof (cl_mem ), (void * ) & deviceD ) );
493
- CL_SAFE_CALL ( clSetKernelArg (R2W2Kernel , 4 , sizeof (cl_int ), (void * ) & pad ) );
494
- CL_SAFE_CALL ( clSetKernelArg (R2W2Kernel , 5 , sizeof (cl_int ), (void * ) & pad_x ) );
495
- CL_SAFE_CALL ( clSetKernelArg (R2W2Kernel , 6 , sizeof (cl_int ), (void * ) & dim_x ) );
496
- CL_SAFE_CALL ( clSetKernelArg (R2W2Kernel , 7 , sizeof (cl_int ), (void * ) & halo ) );
472
+ CL_SAFE_CALL ( clSetKernelArg (R2W1Kernel , 0 , sizeof (cl_mem ), (void * ) & deviceA ) );
473
+ CL_SAFE_CALL ( clSetKernelArg (R2W1Kernel , 1 , sizeof (cl_mem ), (void * ) & deviceB ) );
474
+ CL_SAFE_CALL ( clSetKernelArg (R2W1Kernel , 2 , sizeof (cl_mem ), (void * ) & deviceD ) );
475
+ CL_SAFE_CALL ( clSetKernelArg (R2W1Kernel , 3 , sizeof (cl_int ), (void * ) & pad ) );
476
+ CL_SAFE_CALL ( clSetKernelArg (R2W1Kernel , 4 , sizeof (cl_int ), (void * ) & pad_x ) );
477
+ CL_SAFE_CALL ( clSetKernelArg (R2W1Kernel , 5 , sizeof (cl_int ), (void * ) & dim_x ) );
478
+ CL_SAFE_CALL ( clSetKernelArg (R2W1Kernel , 6 , sizeof (cl_int ), (void * ) & halo ) );
479
+
480
+ CL_SAFE_CALL ( clSetKernelArg (R3W1Kernel , 0 , sizeof (cl_mem ), (void * ) & deviceA ) );
481
+ CL_SAFE_CALL ( clSetKernelArg (R3W1Kernel , 1 , sizeof (cl_mem ), (void * ) & deviceB ) );
482
+ CL_SAFE_CALL ( clSetKernelArg (R3W1Kernel , 2 , sizeof (cl_mem ), (void * ) & deviceC ) );
483
+ CL_SAFE_CALL ( clSetKernelArg (R3W1Kernel , 3 , sizeof (cl_mem ), (void * ) & deviceD ) );
484
+ CL_SAFE_CALL ( clSetKernelArg (R3W1Kernel , 4 , sizeof (cl_int ), (void * ) & pad ) );
485
+ CL_SAFE_CALL ( clSetKernelArg (R3W1Kernel , 5 , sizeof (cl_int ), (void * ) & pad_x ) );
486
+ CL_SAFE_CALL ( clSetKernelArg (R3W1Kernel , 6 , sizeof (cl_int ), (void * ) & dim_x ) );
487
+ CL_SAFE_CALL ( clSetKernelArg (R3W1Kernel , 7 , sizeof (cl_int ), (void * ) & halo ) );
488
+
489
+ CL_SAFE_CALL ( clSetKernelArg (R2W2Kernel , 0 , sizeof (cl_mem ), (void * ) & deviceA ) );
490
+ CL_SAFE_CALL ( clSetKernelArg (R2W2Kernel , 1 , sizeof (cl_mem ), (void * ) & deviceB ) );
491
+ CL_SAFE_CALL ( clSetKernelArg (R2W2Kernel , 2 , sizeof (cl_mem ), (void * ) & deviceC ) );
492
+ CL_SAFE_CALL ( clSetKernelArg (R2W2Kernel , 3 , sizeof (cl_mem ), (void * ) & deviceD ) );
493
+ CL_SAFE_CALL ( clSetKernelArg (R2W2Kernel , 4 , sizeof (cl_int ), (void * ) & pad ) );
494
+ CL_SAFE_CALL ( clSetKernelArg (R2W2Kernel , 5 , sizeof (cl_int ), (void * ) & pad_x ) );
495
+ CL_SAFE_CALL ( clSetKernelArg (R2W2Kernel , 6 , sizeof (cl_int ), (void * ) & dim_x ) );
496
+ CL_SAFE_CALL ( clSetKernelArg (R2W2Kernel , 7 , sizeof (cl_int ), (void * ) & halo ) );
497
497
#else
498
498
long loop_exit = (long )(BLOCK_X / VEC ) * (long )num_blk_x * (long )dim_y ;
499
499
@@ -506,37 +506,37 @@ int main(int argc, char **argv)
506
506
CL_SAFE_CALL ( clSetKernelArg (R1W1Kernel , 6 , sizeof (cl_long ), (void * ) & loop_exit ) );
507
507
CL_SAFE_CALL ( clSetKernelArg (R1W1Kernel , 7 , sizeof (cl_int ), (void * ) & halo ) );
508
508
509
- CL_SAFE_CALL ( clSetKernelArg (R2W1Kernel , 0 , sizeof (cl_mem ), (void * ) & deviceA ) );
510
- CL_SAFE_CALL ( clSetKernelArg (R2W1Kernel , 1 , sizeof (cl_mem ), (void * ) & deviceB ) );
511
- CL_SAFE_CALL ( clSetKernelArg (R2W1Kernel , 2 , sizeof (cl_mem ), (void * ) & deviceD ) );
512
- CL_SAFE_CALL ( clSetKernelArg (R2W1Kernel , 3 , sizeof (cl_int ), (void * ) & pad ) );
513
- CL_SAFE_CALL ( clSetKernelArg (R2W1Kernel , 4 , sizeof (cl_int ), (void * ) & pad_x ) );
514
- CL_SAFE_CALL ( clSetKernelArg (R2W1Kernel , 5 , sizeof (cl_int ), (void * ) & dim_x ) );
515
- CL_SAFE_CALL ( clSetKernelArg (R2W1Kernel , 6 , sizeof (cl_int ), (void * ) & dim_y ) );
516
- CL_SAFE_CALL ( clSetKernelArg (R2W1Kernel , 7 , sizeof (cl_long ), (void * ) & loop_exit ) );
517
- CL_SAFE_CALL ( clSetKernelArg (R2W1Kernel , 8 , sizeof (cl_int ), (void * ) & halo ) );
518
-
519
- CL_SAFE_CALL ( clSetKernelArg (R3W1Kernel , 0 , sizeof (cl_mem ), (void * ) & deviceA ) );
520
- CL_SAFE_CALL ( clSetKernelArg (R3W1Kernel , 1 , sizeof (cl_mem ), (void * ) & deviceB ) );
521
- CL_SAFE_CALL ( clSetKernelArg (R3W1Kernel , 2 , sizeof (cl_mem ), (void * ) & deviceC ) );
522
- CL_SAFE_CALL ( clSetKernelArg (R3W1Kernel , 3 , sizeof (cl_mem ), (void * ) & deviceD ) );
523
- CL_SAFE_CALL ( clSetKernelArg (R3W1Kernel , 4 , sizeof (cl_int ), (void * ) & pad ) );
524
- CL_SAFE_CALL ( clSetKernelArg (R3W1Kernel , 5 , sizeof (cl_int ), (void * ) & pad_x ) );
525
- CL_SAFE_CALL ( clSetKernelArg (R3W1Kernel , 6 , sizeof (cl_int ), (void * ) & dim_x ) );
526
- CL_SAFE_CALL ( clSetKernelArg (R3W1Kernel , 7 , sizeof (cl_int ), (void * ) & dim_y ) );
527
- CL_SAFE_CALL ( clSetKernelArg (R3W1Kernel , 8 , sizeof (cl_long ), (void * ) & loop_exit ) );
528
- CL_SAFE_CALL ( clSetKernelArg (R3W1Kernel , 9 , sizeof (cl_int ), (void * ) & halo ) );
529
-
530
- CL_SAFE_CALL ( clSetKernelArg (R2W2Kernel , 0 , sizeof (cl_mem ), (void * ) & deviceA ) );
531
- CL_SAFE_CALL ( clSetKernelArg (R2W2Kernel , 1 , sizeof (cl_mem ), (void * ) & deviceB ) );
532
- CL_SAFE_CALL ( clSetKernelArg (R2W2Kernel , 2 , sizeof (cl_mem ), (void * ) & deviceC ) );
533
- CL_SAFE_CALL ( clSetKernelArg (R2W2Kernel , 3 , sizeof (cl_mem ), (void * ) & deviceD ) );
534
- CL_SAFE_CALL ( clSetKernelArg (R2W2Kernel , 4 , sizeof (cl_int ), (void * ) & pad ) );
535
- CL_SAFE_CALL ( clSetKernelArg (R2W2Kernel , 5 , sizeof (cl_int ), (void * ) & pad_x ) );
536
- CL_SAFE_CALL ( clSetKernelArg (R2W2Kernel , 6 , sizeof (cl_int ), (void * ) & dim_x ) );
537
- CL_SAFE_CALL ( clSetKernelArg (R2W2Kernel , 7 , sizeof (cl_int ), (void * ) & dim_y ) );
538
- CL_SAFE_CALL ( clSetKernelArg (R2W2Kernel , 8 , sizeof (cl_long ), (void * ) & loop_exit ) );
539
- CL_SAFE_CALL ( clSetKernelArg (R2W2Kernel , 9 , sizeof (cl_int ), (void * ) & halo ) );
509
+ CL_SAFE_CALL ( clSetKernelArg (R2W1Kernel , 0 , sizeof (cl_mem ), (void * ) & deviceA ) );
510
+ CL_SAFE_CALL ( clSetKernelArg (R2W1Kernel , 1 , sizeof (cl_mem ), (void * ) & deviceB ) );
511
+ CL_SAFE_CALL ( clSetKernelArg (R2W1Kernel , 2 , sizeof (cl_mem ), (void * ) & deviceD ) );
512
+ CL_SAFE_CALL ( clSetKernelArg (R2W1Kernel , 3 , sizeof (cl_int ), (void * ) & pad ) );
513
+ CL_SAFE_CALL ( clSetKernelArg (R2W1Kernel , 4 , sizeof (cl_int ), (void * ) & pad_x ) );
514
+ CL_SAFE_CALL ( clSetKernelArg (R2W1Kernel , 5 , sizeof (cl_int ), (void * ) & dim_x ) );
515
+ CL_SAFE_CALL ( clSetKernelArg (R2W1Kernel , 6 , sizeof (cl_int ), (void * ) & dim_y ) );
516
+ CL_SAFE_CALL ( clSetKernelArg (R2W1Kernel , 7 , sizeof (cl_long ), (void * ) & loop_exit ) );
517
+ CL_SAFE_CALL ( clSetKernelArg (R2W1Kernel , 8 , sizeof (cl_int ), (void * ) & halo ) );
518
+
519
+ CL_SAFE_CALL ( clSetKernelArg (R3W1Kernel , 0 , sizeof (cl_mem ), (void * ) & deviceA ) );
520
+ CL_SAFE_CALL ( clSetKernelArg (R3W1Kernel , 1 , sizeof (cl_mem ), (void * ) & deviceB ) );
521
+ CL_SAFE_CALL ( clSetKernelArg (R3W1Kernel , 2 , sizeof (cl_mem ), (void * ) & deviceC ) );
522
+ CL_SAFE_CALL ( clSetKernelArg (R3W1Kernel , 3 , sizeof (cl_mem ), (void * ) & deviceD ) );
523
+ CL_SAFE_CALL ( clSetKernelArg (R3W1Kernel , 4 , sizeof (cl_int ), (void * ) & pad ) );
524
+ CL_SAFE_CALL ( clSetKernelArg (R3W1Kernel , 5 , sizeof (cl_int ), (void * ) & pad_x ) );
525
+ CL_SAFE_CALL ( clSetKernelArg (R3W1Kernel , 6 , sizeof (cl_int ), (void * ) & dim_x ) );
526
+ CL_SAFE_CALL ( clSetKernelArg (R3W1Kernel , 7 , sizeof (cl_int ), (void * ) & dim_y ) );
527
+ CL_SAFE_CALL ( clSetKernelArg (R3W1Kernel , 8 , sizeof (cl_long ), (void * ) & loop_exit ) );
528
+ CL_SAFE_CALL ( clSetKernelArg (R3W1Kernel , 9 , sizeof (cl_int ), (void * ) & halo ) );
529
+
530
+ CL_SAFE_CALL ( clSetKernelArg (R2W2Kernel , 0 , sizeof (cl_mem ), (void * ) & deviceA ) );
531
+ CL_SAFE_CALL ( clSetKernelArg (R2W2Kernel , 1 , sizeof (cl_mem ), (void * ) & deviceB ) );
532
+ CL_SAFE_CALL ( clSetKernelArg (R2W2Kernel , 2 , sizeof (cl_mem ), (void * ) & deviceC ) );
533
+ CL_SAFE_CALL ( clSetKernelArg (R2W2Kernel , 3 , sizeof (cl_mem ), (void * ) & deviceD ) );
534
+ CL_SAFE_CALL ( clSetKernelArg (R2W2Kernel , 4 , sizeof (cl_int ), (void * ) & pad ) );
535
+ CL_SAFE_CALL ( clSetKernelArg (R2W2Kernel , 5 , sizeof (cl_int ), (void * ) & pad_x ) );
536
+ CL_SAFE_CALL ( clSetKernelArg (R2W2Kernel , 6 , sizeof (cl_int ), (void * ) & dim_x ) );
537
+ CL_SAFE_CALL ( clSetKernelArg (R2W2Kernel , 7 , sizeof (cl_int ), (void * ) & dim_y ) );
538
+ CL_SAFE_CALL ( clSetKernelArg (R2W2Kernel , 8 , sizeof (cl_long ), (void * ) & loop_exit ) );
539
+ CL_SAFE_CALL ( clSetKernelArg (R2W2Kernel , 9 , sizeof (cl_int ), (void * ) & halo ) );
540
540
#endif
541
541
#elif CHBLK2D
542
542
int valid_blk_x = BLOCK_X - 2 * halo ;
0 commit comments