Skip to content

Commit 5f7f042

Browse files
author
claudehang
authored
Add files via upload
1 parent bcc144d commit 5f7f042

21 files changed

+2458
-0
lines changed

PluginFactory.h

+825
Large diffs are not rendered by default.

activation_kernels.cu

+86
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,86 @@
1+
#include "activations.h"
2+
#include "cuda_yolo.h"
3+
#include "blas.h"
4+
5+
6+
7+
__device__ float lhtan_activate_kernel(float x)
8+
{
9+
if(x < 0) return .001f*x;
10+
if(x > 1) return .001f*(x-1.f) + 1.f;
11+
return x;
12+
}
13+
14+
__device__ float hardtan_activate_kernel(float x)
15+
{
16+
if (x < -1) return -1;
17+
if (x > 1) return 1;
18+
return x;
19+
}
20+
21+
__device__ float linear_activate_kernel(float x){return x;}
22+
__device__ float logistic_activate_kernel(float x){return 1.f/(1.f + expf(-x));}
23+
__device__ float loggy_activate_kernel(float x){return 2.f/(1.f + expf(-x)) - 1;}
24+
__device__ float relu_activate_kernel(float x){return x*(x>0);}
25+
__device__ float elu_activate_kernel(float x){return (x >= 0)*x + (x < 0)*(expf(x)-1);}
26+
__device__ float relie_activate_kernel(float x){return (x>0) ? x : .01f*x;}
27+
__device__ float ramp_activate_kernel(float x){return x*(x>0)+.1f*x;}
28+
__device__ float leaky_activate_kernel(float x){return (x>0) ? x : .1f*x;}
29+
__device__ float tanh_activate_kernel(float x){return (2.f/(1 + expf(-2*x)) - 1);}
30+
__device__ float plse_activate_kernel(float x)
31+
{
32+
if(x < -4) return .01f * (x + 4);
33+
if(x > 4) return .01f * (x - 4) + 1;
34+
return .125f*x + .5f;
35+
}
36+
__device__ float stair_activate_kernel(float x)
37+
{
38+
int n = floorf(x);
39+
if (n%2 == 0) return floorf(x/2);
40+
else return (x - n) + floorf(x/2);
41+
}
42+
43+
__device__ float activate_kernel(float x, ACTIVATION a)
44+
{
45+
switch(a){
46+
case LINEAR:
47+
return linear_activate_kernel(x);
48+
case LOGISTIC:
49+
return logistic_activate_kernel(x);
50+
case LOGGY:
51+
return loggy_activate_kernel(x);
52+
case RELU:
53+
return relu_activate_kernel(x);
54+
case ELU:
55+
return elu_activate_kernel(x);
56+
case RELIE:
57+
return relie_activate_kernel(x);
58+
case RAMP:
59+
return ramp_activate_kernel(x);
60+
case LEAKY:
61+
return leaky_activate_kernel(x);
62+
case TANH:
63+
return tanh_activate_kernel(x);
64+
case PLSE:
65+
return plse_activate_kernel(x);
66+
case STAIR:
67+
return stair_activate_kernel(x);
68+
case HARDTAN:
69+
return hardtan_activate_kernel(x);
70+
case LHTAN:
71+
return lhtan_activate_kernel(x);
72+
}
73+
return 0;
74+
}
75+
76+
__global__ void activate_array_kernel(float *x, int n, ACTIVATION a)
77+
{
78+
int i = (blockIdx.x + blockIdx.y*gridDim.x) * blockDim.x + threadIdx.x;
79+
if(i < n) x[i] = activate_kernel(x[i], a);
80+
}
81+
82+
void activate_array_gpu(float *x, int n, ACTIVATION a)
83+
{
84+
activate_array_kernel<<<cuda_gridsize(n), BLOCK>>>(x, n, a);
85+
check_error(cudaPeekAtLastError());
86+
}

activations.h

+10
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,10 @@
1+
#ifndef __ACTIVATIONS_H_
2+
#define __ACTIVATIONS_H_
3+
4+
typedef enum{
5+
LOGISTIC, RELU, RELIE, LINEAR, RAMP, TANH, PLSE, LEAKY, ELU, LOGGY, STAIR, HARDTAN, LHTAN
6+
} ACTIVATION;
7+
8+
void activate_array_gpu(float* x,int n,ACTIVATION a);
9+
10+
#endif

blas.h

+8
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,8 @@
1+
#ifndef __BLAS_H_
2+
#define __BLAS_H_
3+
4+
void copy_gpu(int N,float* X,int INCX,float* Y,int INCY);
5+
6+
void fill_gpu(int N, float ALPHA, float * X, int INCX);
7+
8+
#endif

blas_kernels.cu

+34
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,34 @@
1+
#include <assert.h>
2+
3+
#include "cuda_yolo.h"
4+
#include "blas.h"
5+
6+
__global__ void copy_kernel(int N,float* X,int OFFX,int INCX,float* Y,int OFFY,int INCY)
7+
{
8+
int i = (blockIdx.x + blockIdx.y*gridDim.x) * blockDim.x + threadIdx.x;
9+
if(i < N) Y[i*INCY + OFFY] = X[i*INCX + OFFX];
10+
}
11+
12+
__global__ void fill_kernel(int N, float ALPHA, float *X, int INCX)
13+
{
14+
int i = (blockIdx.x + blockIdx.y*gridDim.x) * blockDim.x + threadIdx.x;
15+
if(i < N) X[i*INCX] = ALPHA;
16+
}
17+
18+
void copy_gpu_offset(int N,float* X,int OFFX,int INCX,float* Y,int OFFY,int INCY)
19+
{
20+
copy_kernel<<<cuda_gridsize(N),BLOCK>>>(N,X,OFFX,INCX,Y,OFFY,INCY);
21+
check_error(cudaPeekAtLastError());
22+
}
23+
24+
void copy_gpu(int N,float* X,int INCX,float* Y,int INCY)
25+
{
26+
copy_gpu_offset(N,X,0,INCX,Y,0,INCY);
27+
}
28+
29+
30+
void fill_gpu(int N, float ALPHA, float * X, int INCX)
31+
{
32+
fill_kernel<<<cuda_gridsize(N), BLOCK>>>(N, ALPHA, X, INCX);
33+
check_error(cudaPeekAtLastError());
34+
}

box.cpp

+84
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,84 @@
1+
#include "box.h"
2+
#include <stdio.h>
3+
#include <math.h>
4+
#include <stdlib.h>
5+
6+
int nms_comparator(const void *pa, const void *pb)
7+
{
8+
detection a = *(detection *)pa;
9+
detection b = *(detection *)pb;
10+
float diff = 0;
11+
if(b.sort_class >= 0){
12+
diff = a.prob[b.sort_class] - b.prob[b.sort_class];
13+
} else {
14+
diff = a.objectness - b.objectness;
15+
}
16+
if(diff < 0) return 1;
17+
else if(diff > 0) return -1;
18+
return 0;
19+
}
20+
21+
float overlap(float x1, float w1, float x2, float w2)
22+
{
23+
float l1 = x1 - w1/2;
24+
float l2 = x2 - w2/2;
25+
float left = l1 > l2 ? l1 : l2;
26+
float r1 = x1 + w1/2;
27+
float r2 = x2 + w2/2;
28+
float right = r1 < r2 ? r1 : r2;
29+
return right - left;
30+
}
31+
32+
float box_intersection(box a, box b)
33+
{
34+
float w = overlap(a.x, a.w, b.x, b.w);
35+
float h = overlap(a.y, a.h, b.y, b.h);
36+
if(w < 0 || h < 0) return 0;
37+
float area = w*h;
38+
return area;
39+
}
40+
41+
float box_union(box a, box b)
42+
{
43+
float i = box_intersection(a, b);
44+
float u = a.w*a.h + b.w*b.h - i;
45+
return u;
46+
}
47+
48+
float box_iou(box a, box b)
49+
{
50+
return box_intersection(a, b)/box_union(a, b);
51+
}
52+
53+
void do_nms_sort(detection *dets, int total, int classes, float thresh)
54+
{
55+
int i, j, k;
56+
k = total-1;
57+
for(i = 0; i <= k; ++i){
58+
if(dets[i].objectness == 0){
59+
detection swap = dets[i];
60+
dets[i] = dets[k];
61+
dets[k] = swap;
62+
--k;
63+
--i;
64+
}
65+
}
66+
total = k+1;
67+
68+
for(k = 0; k < classes; ++k){
69+
for(i = 0; i < total; ++i){
70+
dets[i].sort_class = k;
71+
}
72+
qsort(dets, total, sizeof(detection), nms_comparator);
73+
for(i = 0; i < total; ++i){
74+
if(dets[i].prob[k] == 0) continue;
75+
box a = dets[i].bbox;
76+
for(j = i+1; j < total; ++j){
77+
box b = dets[j].bbox;
78+
if (box_iou(a, b) > thresh){
79+
dets[j].prob[k] = 0;
80+
}
81+
}
82+
}
83+
}
84+
}

box.h

+9
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,9 @@
1+
#ifndef __BOX_H_
2+
#define __BOX_H_
3+
#include "yolo_layer.h"
4+
5+
6+
void do_nms_sort(detection *dets, int total, int classes, float thresh);
7+
8+
9+
#endif

callTRTYOLOv3.cpp

+72
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,72 @@
1+
#include "./yolov3_interface.h"
2+
#include "dlfcn.h"
3+
4+
#define LIB_TRTYOLOV3 "/data/home/claudehang/TensorRT-4.0.1.6/targets/x86_64-linux-gnu/samples/trt-yolo-320-interface/yolov3_interface.so"
5+
//#define LIB_TRTYOLOV3 "/usr/local/lib/yolov3_interface.so"
6+
7+
extern "C"
8+
{
9+
typedef void (*F_trtYolov3Init)(int classes, std::string deployFile, std::string modelFile);
10+
typedef void (*F_trtYolov3Detect)(const std::string& img_name, const std::string& test_image_path, const std::string& output_folder);
11+
typedef void (*F_trtYolov3Free)();
12+
}
13+
14+
int main() {
15+
std::cout << "start loading interface" << std::endl;
16+
17+
void *handle = dlopen(LIB_TRTYOLOV3,RTLD_LAZY);
18+
if(!handle)
19+
{
20+
printf("%s\n",dlerror());
21+
exit(EXIT_FAILURE);
22+
}
23+
24+
char *error;
25+
dlerror();
26+
27+
F_trtYolov3Init trtYolov3Init = (F_trtYolov3Init)dlsym(handle,"trtYolov3Init");
28+
F_trtYolov3Detect trtYolov3Detect = (F_trtYolov3Detect)dlsym(handle,"trtYolov3Detect");
29+
F_trtYolov3Free trtYolov3Free = (F_trtYolov3Free)dlsym(handle,"trtYolov3Free");
30+
31+
if((error = dlerror()) != NULL)
32+
{
33+
printf("%s\n",error);
34+
exit(EXIT_FAILURE);
35+
}
36+
std::cout << "end of loading interface" << std::endl;
37+
38+
// important parameters
39+
std::string deployFile = "/data/home/claudehang/TensorRT-4.0.1.6/bin/ca_man_yolov3.prototxt";
40+
std::string modelFile = "/data/home/claudehang/TensorRT-4.0.1.6/bin/ca_man_yolov3.caffemodel";
41+
42+
std::string test_image_path = "/data/home/claudehang/TensorRT-4.0.1.6/bin/lol-images/";
43+
std::string image_list_file = "/data/home/claudehang/TensorRT-4.0.1.6/bin/lol-images/image_list.txt";
44+
std::string output_folder = "/data/home/claudehang/TensorRT-4.0.1.6/bin/lol-results/";
45+
46+
int gpu_id = 1;
47+
int clsNum = 1;
48+
49+
// read images in given directory
50+
std::ifstream img_list;
51+
img_list.open(image_list_file.data());
52+
if (!img_list)
53+
{
54+
std::cerr << image_list_file << " open error." << std::endl;
55+
exit(1);
56+
}
57+
std::string img_name;
58+
int count = 0;
59+
60+
// initialize tensorrt model
61+
trtYolov3Init(clsNum, deployFile, modelFile);
62+
63+
// do detection for each image
64+
while (getline(img_list, img_name)) {
65+
count++;
66+
//std::string imgFilename = test_image_path + img_name;
67+
std::cout << "YOLO on image < " << img_name << " >" << std::endl;
68+
trtYolov3Detect(img_name.c_str(), test_image_path.c_str(), output_folder.c_str());
69+
}
70+
trtYolov3Free();
71+
dlclose(handle);
72+
}

common.cpp

+33
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,33 @@
1+
#include "common.h"
2+
std::string locateFile(const std::string& input, const std::vector<std::string> & directories)
3+
{
4+
std::string file;
5+
const int MAX_DEPTH{10};
6+
bool found{false};
7+
for (auto &dir : directories)
8+
{
9+
file = dir + input;
10+
for (int i = 0; i < MAX_DEPTH && !found; i++)
11+
{
12+
std::ifstream checkFile(file);
13+
found = checkFile.is_open();
14+
if (found) break;
15+
file = "../" + file;
16+
}
17+
if (found) break;
18+
file.clear();
19+
}
20+
21+
assert(!file.empty() && "Could not find a file due to it not existing in the data directory.");
22+
return file;
23+
}
24+
25+
void readPGMFile(const std::string& fileName, uint8_t *buffer, int inH, int inW)
26+
{
27+
std::ifstream infile(fileName, std::ifstream::binary);
28+
assert(infile.is_open() && "Attempting to read from a file that is not open.");
29+
std::string magic, h, w, max;
30+
infile >> magic >> h >> w >> max;
31+
infile.seekg(1, infile.cur);
32+
infile.read(reinterpret_cast<char*>(buffer), inH*inW);
33+
}

common.h

+43
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,43 @@
1+
#ifndef _TRT_COMMON_H_
2+
#define _TRT_COMMON_H_
3+
#include "NvInfer.h"
4+
#include <string>
5+
#include <vector>
6+
#include <fstream>
7+
#include <cassert>
8+
#include <iostream>
9+
10+
#define CHECK(status) \
11+
{ \
12+
if (status != 0) \
13+
{ \
14+
std::cout << "Cuda failure: " << status; \
15+
abort(); \
16+
} \
17+
}
18+
19+
20+
// Logger for GIE info/warning/errors
21+
class Logger : public nvinfer1::ILogger
22+
{
23+
public:
24+
void log(nvinfer1::ILogger::Severity severity, const char* msg) override
25+
{
26+
// suppress info-level messages
27+
if (severity == Severity::kINFO) return;
28+
29+
switch (severity)
30+
{
31+
case Severity::kINTERNAL_ERROR: std::cerr << "INTERNAL_ERROR: "; break;
32+
case Severity::kERROR: std::cerr << "ERROR: "; break;
33+
case Severity::kWARNING: std::cerr << "WARNING: "; break;
34+
case Severity::kINFO: std::cerr << "INFO: "; break;
35+
default: std::cerr << "UNKNOWN: "; break;
36+
}
37+
std::cerr << msg << std::endl;
38+
}
39+
};
40+
41+
std::string locateFile(const std::string& input, const std::vector<std::string> & directories);
42+
void readPGMFile(const std::string& fileName, uint8_t *buffer, int inH, int inW);
43+
#endif // _TRT_COMMON_H_

0 commit comments

Comments
 (0)