claudehang
diff --git a/‎PluginFactory.h
+825 b/‎PluginFactory.h
+825
diff --git a/‎activation_kernels.cu
+86 b/‎activation_kernels.cu
+86
diff --git a/‎activations.h
+10 b/‎activations.h
+10
diff --git a/‎blas.h
+8 b/‎blas.h
+8
diff --git a/‎blas_kernels.cu
+34 b/‎blas_kernels.cu
+34
diff --git a/‎box.cpp
+84 b/‎box.cpp
+84
diff --git a/‎box.h
+9 b/‎box.h
+9
diff --git a/‎callTRTYOLOv3.cpp
+72 b/‎callTRTYOLOv3.cpp
+72
diff --git a/‎common.cpp
+33 b/‎common.cpp
+33
diff --git a/‎common.h
+43 b/‎common.h
+43
@@ -0,0 +1,86 @@
+#include "activations.h"
+#include "cuda_yolo.h"
+#include "blas.h"
+
+
+
+__device__ float lhtan_activate_kernel(float x)
+{
+    if(x < 0) return .001f*x;
+    if(x > 1) return .001f*(x-1.f) + 1.f;
+    return x;
+}
+
+__device__ float hardtan_activate_kernel(float x)
+{
+    if (x < -1) return -1;
+    if (x > 1) return 1;
+    return x;
+}
+
+__device__ float linear_activate_kernel(float x){return x;}
+__device__ float logistic_activate_kernel(float x){return 1.f/(1.f + expf(-x));}
+__device__ float loggy_activate_kernel(float x){return 2.f/(1.f + expf(-x)) - 1;}
+__device__ float relu_activate_kernel(float x){return x*(x>0);}
+__device__ float elu_activate_kernel(float x){return (x >= 0)*x + (x < 0)*(expf(x)-1);}
+__device__ float relie_activate_kernel(float x){return (x>0) ? x : .01f*x;}
+__device__ float ramp_activate_kernel(float x){return x*(x>0)+.1f*x;}
+__device__ float leaky_activate_kernel(float x){return (x>0) ? x : .1f*x;}
+__device__ float tanh_activate_kernel(float x){return (2.f/(1 + expf(-2*x)) - 1);}
+__device__ float plse_activate_kernel(float x)
+{
+    if(x < -4) return .01f * (x + 4);
+    if(x > 4)  return .01f * (x - 4) + 1;
+    return .125f*x + .5f;
+}
+__device__ float stair_activate_kernel(float x)
+{
+    int n = floorf(x);
+    if (n%2 == 0) return floorf(x/2);
+    else return (x - n) + floorf(x/2);
+}
+
+__device__ float activate_kernel(float x, ACTIVATION a)
+{
+    switch(a){
+        case LINEAR:
+            return linear_activate_kernel(x);
+        case LOGISTIC:
+            return logistic_activate_kernel(x);
+        case LOGGY:
+            return loggy_activate_kernel(x);
+        case RELU:
+            return relu_activate_kernel(x);
+        case ELU:
+            return elu_activate_kernel(x);
+        case RELIE:
+            return relie_activate_kernel(x);
+        case RAMP:
+            return ramp_activate_kernel(x);
+        case LEAKY:
+            return leaky_activate_kernel(x);
+        case TANH:
+            return tanh_activate_kernel(x);
+        case PLSE:
+            return plse_activate_kernel(x);
+        case STAIR:
+            return stair_activate_kernel(x);
+        case HARDTAN:
+            return hardtan_activate_kernel(x);
+        case LHTAN:
+            return lhtan_activate_kernel(x);
+    }
+    return 0;
+}
+
+__global__ void activate_array_kernel(float *x, int n, ACTIVATION a)
+{
+    int i = (blockIdx.x + blockIdx.y*gridDim.x) * blockDim.x + threadIdx.x;
+    if(i < n) x[i] = activate_kernel(x[i], a);
+}
+
+void activate_array_gpu(float *x, int n, ACTIVATION a)
+{
+    activate_array_kernel<<<cuda_gridsize(n), BLOCK>>>(x, n, a);
+    check_error(cudaPeekAtLastError());
+}
@@ -0,0 +1,10 @@
+#ifndef __ACTIVATIONS_H_
+#define __ACTIVATIONS_H_
+
+typedef enum{
+    LOGISTIC, RELU, RELIE, LINEAR, RAMP, TANH, PLSE, LEAKY, ELU, LOGGY, STAIR, HARDTAN, LHTAN
+} ACTIVATION;
+
+void activate_array_gpu(float* x,int n,ACTIVATION a);
+
+#endif
@@ -0,0 +1,8 @@
+#ifndef __BLAS_H_
+#define __BLAS_H_
+
+void copy_gpu(int N,float* X,int INCX,float* Y,int INCY);
+
+void fill_gpu(int N, float ALPHA, float * X, int INCX);
+
+#endif
@@ -0,0 +1,34 @@
+#include <assert.h>
+
+#include "cuda_yolo.h"
+#include "blas.h"
+
+__global__ void copy_kernel(int N,float* X,int OFFX,int INCX,float* Y,int OFFY,int INCY)
+{
+    int i = (blockIdx.x + blockIdx.y*gridDim.x) * blockDim.x + threadIdx.x;
+    if(i < N) Y[i*INCY + OFFY] = X[i*INCX + OFFX];
+}
+
+__global__ void fill_kernel(int N, float ALPHA, float *X, int INCX)
+{
+    int i = (blockIdx.x + blockIdx.y*gridDim.x) * blockDim.x + threadIdx.x;
+    if(i < N) X[i*INCX] = ALPHA;
+}
+
+void copy_gpu_offset(int N,float* X,int OFFX,int INCX,float* Y,int OFFY,int INCY)
+{
+    copy_kernel<<<cuda_gridsize(N),BLOCK>>>(N,X,OFFX,INCX,Y,OFFY,INCY);
+    check_error(cudaPeekAtLastError());
+}
+
+void copy_gpu(int N,float* X,int INCX,float* Y,int INCY)
+{
+    copy_gpu_offset(N,X,0,INCX,Y,0,INCY);
+}
+
+
+void fill_gpu(int N, float ALPHA, float * X, int INCX)
+{
+    fill_kernel<<<cuda_gridsize(N), BLOCK>>>(N, ALPHA, X, INCX);
+    check_error(cudaPeekAtLastError());
+}
@@ -0,0 +1,84 @@
+#include "box.h"
+#include <stdio.h>
+#include <math.h>
+#include <stdlib.h>
+
+int nms_comparator(const void *pa, const void *pb)
+{
+    detection a = *(detection *)pa;
+    detection b = *(detection *)pb;
+    float diff = 0;
+    if(b.sort_class >= 0){
+        diff = a.prob[b.sort_class] - b.prob[b.sort_class];
+    } else {
+        diff = a.objectness - b.objectness;
+    }
+    if(diff < 0) return 1;
+    else if(diff > 0) return -1;
+    return 0;
+}
+
+float overlap(float x1, float w1, float x2, float w2)
+{
+    float l1 = x1 - w1/2;
+    float l2 = x2 - w2/2;
+    float left = l1 > l2 ? l1 : l2;
+    float r1 = x1 + w1/2;
+    float r2 = x2 + w2/2;
+    float right = r1 < r2 ? r1 : r2;
+    return right - left;
+}
+
+float box_intersection(box a, box b)
+{
+    float w = overlap(a.x, a.w, b.x, b.w);
+    float h = overlap(a.y, a.h, b.y, b.h);
+    if(w < 0 || h < 0) return 0;
+    float area = w*h;
+    return area;
+}
+
+float box_union(box a, box b)
+{
+    float i = box_intersection(a, b);
+    float u = a.w*a.h + b.w*b.h - i;
+    return u;
+}
+
+float box_iou(box a, box b)
+{
+    return box_intersection(a, b)/box_union(a, b);
+}
+
+void do_nms_sort(detection *dets, int total, int classes, float thresh)
+{
+    int i, j, k;
+    k = total-1;
+    for(i = 0; i <= k; ++i){
+        if(dets[i].objectness == 0){
+            detection swap = dets[i];
+            dets[i] = dets[k];
+            dets[k] = swap;
+            --k;
+            --i;
+        }
+    }
+    total = k+1;
+
+    for(k = 0; k < classes; ++k){
+        for(i = 0; i < total; ++i){
+            dets[i].sort_class = k;
+        }
+        qsort(dets, total, sizeof(detection), nms_comparator);
+        for(i = 0; i < total; ++i){
+            if(dets[i].prob[k] == 0) continue;
+            box a = dets[i].bbox;
+            for(j = i+1; j < total; ++j){
+                box b = dets[j].bbox;
+                if (box_iou(a, b) > thresh){
+                    dets[j].prob[k] = 0;
+                }
+            }
+        }
+    }
+}
@@ -0,0 +1,9 @@
+#ifndef __BOX_H_
+#define __BOX_H_
+#include "yolo_layer.h"
+
+
+void do_nms_sort(detection *dets, int total, int classes, float thresh);
+
+
+#endif
@@ -0,0 +1,72 @@
+#include "./yolov3_interface.h"
+#include "dlfcn.h"
+
+#define LIB_TRTYOLOV3 "/data/home/claudehang/TensorRT-4.0.1.6/targets/x86_64-linux-gnu/samples/trt-yolo-320-interface/yolov3_interface.so"
+//#define LIB_TRTYOLOV3 "/usr/local/lib/yolov3_interface.so"
+
+extern "C"
+{
+    typedef void (*F_trtYolov3Init)(int classes, std::string deployFile, std::string modelFile);
+    typedef void (*F_trtYolov3Detect)(const std::string& img_name, const std::string& test_image_path, const std::string& output_folder);
+    typedef void (*F_trtYolov3Free)();
+}
+
+int main() {
+    std::cout << "start loading interface" << std::endl;
+
+    void *handle = dlopen(LIB_TRTYOLOV3,RTLD_LAZY);
+    if(!handle)
+    {
+        printf("%s\n",dlerror());
+        exit(EXIT_FAILURE);
+    }
+
+    char *error;
+    dlerror();
+
+    F_trtYolov3Init trtYolov3Init = (F_trtYolov3Init)dlsym(handle,"trtYolov3Init");
+    F_trtYolov3Detect trtYolov3Detect = (F_trtYolov3Detect)dlsym(handle,"trtYolov3Detect");
+    F_trtYolov3Free trtYolov3Free = (F_trtYolov3Free)dlsym(handle,"trtYolov3Free");
+    
+    if((error = dlerror()) != NULL)
+    {
+        printf("%s\n",error);
+        exit(EXIT_FAILURE);
+    }
+    std::cout << "end of loading interface" << std::endl;
+
+    // important parameters
+    std::string deployFile = "/data/home/claudehang/TensorRT-4.0.1.6/bin/ca_man_yolov3.prototxt";
+    std::string modelFile  = "/data/home/claudehang/TensorRT-4.0.1.6/bin/ca_man_yolov3.caffemodel";
+
+    std::string test_image_path = "/data/home/claudehang/TensorRT-4.0.1.6/bin/lol-images/";
+    std::string image_list_file = "/data/home/claudehang/TensorRT-4.0.1.6/bin/lol-images/image_list.txt";
+    std::string output_folder   = "/data/home/claudehang/TensorRT-4.0.1.6/bin/lol-results/";
+
+    int gpu_id = 1;
+    int clsNum = 1;
+
+    // read images in given directory
+    std::ifstream img_list;
+    img_list.open(image_list_file.data());
+    if (!img_list)
+    {
+        std::cerr << image_list_file << " open error." << std::endl;
+        exit(1);
+    }
+    std::string img_name;
+    int count = 0;
+
+    // initialize tensorrt model
+    trtYolov3Init(clsNum, deployFile, modelFile);
+
+    // do detection for each image
+    while (getline(img_list, img_name)) {
+        count++;
+        //std::string imgFilename = test_image_path + img_name;
+        std::cout << "YOLO on image < " << img_name << " >" << std::endl;
+        trtYolov3Detect(img_name.c_str(), test_image_path.c_str(), output_folder.c_str());
+    }
+    trtYolov3Free();
+    dlclose(handle);
+}
@@ -0,0 +1,33 @@
+#include "common.h"
+std::string locateFile(const std::string& input, const std::vector<std::string> & directories)
+{
+    std::string file;
+	const int MAX_DEPTH{10};
+    bool found{false};
+    for (auto &dir : directories)
+    {
+        file = dir + input;
+        for (int i = 0; i < MAX_DEPTH && !found; i++)
+        {
+            std::ifstream checkFile(file);
+            found = checkFile.is_open();
+            if (found) break;
+            file = "../" + file;
+        }
+        if (found) break;
+        file.clear();
+    }
+
+    assert(!file.empty() && "Could not find a file due to it not existing in the data directory.");
+    return file;
+}
+
+void readPGMFile(const std::string& fileName,  uint8_t *buffer, int inH, int inW)
+{
+	std::ifstream infile(fileName, std::ifstream::binary);
+    assert(infile.is_open() && "Attempting to read from a file that is not open.");
+	std::string magic, h, w, max;
+	infile >> magic >> h >> w >> max;
+	infile.seekg(1, infile.cur);
+	infile.read(reinterpret_cast<char*>(buffer), inH*inW);
+}
@@ -0,0 +1,43 @@
+#ifndef _TRT_COMMON_H_
+#define _TRT_COMMON_H_
+#include "NvInfer.h"
+#include <string>
+#include <vector>
+#include <fstream>
+#include <cassert>
+#include <iostream>
+
+#define CHECK(status)									\
+{														\
+	if (status != 0)									\
+	{													\
+		std::cout << "Cuda failure: " << status;		\
+		abort();										\
+	}													\
+}
+
+
+// Logger for GIE info/warning/errors
+class Logger : public nvinfer1::ILogger			
+{
+    public:
+	void log(nvinfer1::ILogger::Severity severity, const char* msg) override
+	{
+		// suppress info-level messages
+     if (severity == Severity::kINFO) return;
+
+        switch (severity)
+        {
+            case Severity::kINTERNAL_ERROR: std::cerr << "INTERNAL_ERROR: "; break;
+            case Severity::kERROR: std::cerr << "ERROR: "; break;
+            case Severity::kWARNING: std::cerr << "WARNING: "; break;
+            case Severity::kINFO: std::cerr << "INFO: "; break;
+            default: std::cerr << "UNKNOWN: "; break;
+        }
+        std::cerr << msg << std::endl;
+	}
+};
+
+std::string locateFile(const std::string& input, const std::vector<std::string> & directories);
+void readPGMFile(const std::string& fileName,  uint8_t *buffer, int inH, int inW);
+#endif // _TRT_COMMON_H_