yunyunhello
diff --git a/‎CMakeLists.txt
Lines changed: 1 addition & 0 deletions b/‎CMakeLists.txt
Lines changed: 1 addition & 0 deletions
diff --git a/‎Makefile
Lines changed: 6 additions & 0 deletions b/‎Makefile
Lines changed: 6 additions & 0 deletions
diff --git a/‎Makefile.config.example
Lines changed: 4 additions & 0 deletions b/‎Makefile.config.example
Lines changed: 4 additions & 0 deletions
diff --git a/‎cmake/Dependencies.cmake
Lines changed: 11 additions & 4 deletions b/‎cmake/Dependencies.cmake
Lines changed: 11 additions & 4 deletions
diff --git a/‎cmake/Modules/FindNCCL.cmake
Lines changed: 26 additions & 0 deletions b/‎cmake/Modules/FindNCCL.cmake
Lines changed: 26 additions & 0 deletions
diff --git a/‎cmake/Summary.cmake
Lines changed: 1 addition & 0 deletions b/‎cmake/Summary.cmake
Lines changed: 1 addition & 0 deletions
diff --git a/‎include/caffe/blob.hpp
Lines changed: 1 addition & 0 deletions b/‎include/caffe/blob.hpp
Lines changed: 1 addition & 0 deletions
diff --git a/‎include/caffe/common.hpp
Lines changed: 10 additions & 4 deletions b/‎include/caffe/common.hpp
Lines changed: 10 additions & 4 deletions
diff --git a/‎include/caffe/data_reader.hpp
Lines changed: 0 additions & 82 deletions b/‎include/caffe/data_reader.hpp
Lines changed: 0 additions & 82 deletions
diff --git a/‎include/caffe/internal_thread.hpp
Lines changed: 2 additions & 2 deletions b/‎include/caffe/internal_thread.hpp
Lines changed: 2 additions & 2 deletions
@@ -28,6 +28,7 @@ include(cmake/ConfigGen.cmake)
 # ---[ Options
 caffe_option(CPU_ONLY  "Build Caffe without CUDA support" OFF) # TODO: rename to USE_CUDA
 caffe_option(USE_CUDNN "Build Caffe with cuDNN library support" ON IF NOT CPU_ONLY)
+caffe_option(USE_NCCL "Build Caffe with NCCL library support" OFF)
 caffe_option(BUILD_SHARED_LIBS "Build shared libraries" ON)
 caffe_option(BUILD_python "Build Python wrapper" ON)
 set(python_version "2" CACHE STRING "Specify which Python version to use")
 
@@ -328,6 +328,12 @@ ifeq ($(USE_CUDNN), 1)
 	COMMON_FLAGS += -DUSE_CUDNN
 endif
 
+# NCCL acceleration configuration
+ifeq ($(USE_NCCL), 1)
+	LIBRARIES += nccl
+	COMMON_FLAGS += -DUSE_NCCL
+endif
+
 # configure IO libraries
 ifeq ($(USE_OPENCV), 1)
 	COMMON_FLAGS += -DUSE_OPENCV
 
@@ -94,6 +94,10 @@ LIBRARY_DIRS := $(PYTHON_LIB) /usr/local/lib /usr/lib
 # INCLUDE_DIRS += $(shell brew --prefix)/include
 # LIBRARY_DIRS += $(shell brew --prefix)/lib
 
+# NCCL acceleration switch (uncomment to build with NCCL)
+# https://github.com/NVIDIA/nccl (last tested version: v1.2.3-1+cuda8.0)
+# USE_NCCL := 1
+
 # Uncomment to use `pkg-config` to specify OpenCV library paths.
 # (Usually not necessary -- OpenCV libraries are normally installed in one of the above $LIBRARY_DIRS.)
 # USE_PKG_CONFIG := 1
 
@@ -67,6 +67,13 @@ if(NOT HAVE_CUDA)
   add_definitions(-DCPU_ONLY)
 endif()
 
+if(USE_NCCL)
+  find_package(NCCL REQUIRED)
+  include_directories(SYSTEM ${NCCL_INCLUDE_DIR})
+  list(APPEND Caffe_LINKER_LIBS ${NCCL_LIBRARIES})
+  add_definitions(-DUSE_NCCL)
+endif()
+
 # ---[ OpenCV
 if(USE_OPENCV)
   find_package(OpenCV QUIET COMPONENTS core highgui imgproc imgcodecs)
@@ -119,18 +126,18 @@ if(BUILD_python)
     find_package(NumPy 1.7.1)
     # Find the matching boost python implementation
     set(version ${PYTHONLIBS_VERSION_STRING})
-    
+
     STRING( REGEX REPLACE "[^0-9]" "" boost_py_version ${version} )
     find_package(Boost 1.46 COMPONENTS "python-py${boost_py_version}")
     set(Boost_PYTHON_FOUND ${Boost_PYTHON-PY${boost_py_version}_FOUND})
-    
+
     while(NOT "${version}" STREQUAL "" AND NOT Boost_PYTHON_FOUND)
       STRING( REGEX REPLACE "([0-9.]+).[0-9]+" "\\1" version ${version} )
-      
+
       STRING( REGEX REPLACE "[^0-9]" "" boost_py_version ${version} )
       find_package(Boost 1.46 COMPONENTS "python-py${boost_py_version}")
       set(Boost_PYTHON_FOUND ${Boost_PYTHON-PY${boost_py_version}_FOUND})
-      
+
       STRING( REGEX MATCHALL "([0-9.]+).[0-9]+" has_more_version ${version} )
       if("${has_more_version}" STREQUAL "")
         break()
 
@@ -0,0 +1,26 @@
+set(NCCL_INC_PATHS
+    /usr/include
+    /usr/local/include
+    $ENV{NCCL_DIR}/include
+    )
+
+set(NCCL_LIB_PATHS
+    /lib
+    /lib64
+    /usr/lib
+    /usr/lib64
+    /usr/local/lib
+    /usr/local/lib64
+    $ENV{NCCL_DIR}/lib
+    )
+
+find_path(NCCL_INCLUDE_DIR NAMES nccl.h PATHS ${NCCL_INC_PATHS})
+find_library(NCCL_LIBRARIES NAMES nccl PATHS ${NCCL_LIB_PATHS})
+
+include(FindPackageHandleStandardArgs)
+find_package_handle_standard_args(NCCL DEFAULT_MSG NCCL_INCLUDE_DIR NCCL_LIBRARIES)
+
+if (NCCL_FOUND)
+  message(STATUS "Found NCCL    (include: ${NCCL_INCLUDE_DIR}, library: ${NCCL_LIBRARIES})")
+  mark_as_advanced(NCCL_INCLUDE_DIR NCCL_LIBRARIES)
+endif ()
@@ -117,6 +117,7 @@ function(caffe_print_configuration_summary)
   caffe_status("  USE_OPENCV        :   ${USE_OPENCV}")
   caffe_status("  USE_LEVELDB       :   ${USE_LEVELDB}")
   caffe_status("  USE_LMDB          :   ${USE_LMDB}")
+  caffe_status("  USE_NCCL          :   ${USE_NCCL}")
   caffe_status("  ALLOW_LMDB_NOLOCK :   ${ALLOW_LMDB_NOLOCK}")
   caffe_status("")
   caffe_status("Dependencies:")
 
@@ -220,6 +220,7 @@ class Blob {
   void set_cpu_data(Dtype* data);
   const int* gpu_shape() const;
   const Dtype* gpu_data() const;
+  void set_gpu_data(Dtype* data);
   const Dtype* cpu_diff() const;
   const Dtype* gpu_diff() const;
   Dtype* mutable_cpu_data();
 
@@ -158,11 +158,14 @@ class Caffe {
   // Search from start_id to the highest possible device ordinal,
   // return the ordinal of the first available device.
   static int FindDevice(const int start_id = 0);
-  // Parallel training info
+  // Parallel training
   inline static int solver_count() { return Get().solver_count_; }
   inline static void set_solver_count(int val) { Get().solver_count_ = val; }
-  inline static bool root_solver() { return Get().root_solver_; }
-  inline static void set_root_solver(bool val) { Get().root_solver_ = val; }
+  inline static int solver_rank() { return Get().solver_rank_; }
+  inline static void set_solver_rank(int val) { Get().solver_rank_ = val; }
+  inline static bool multiprocess() { return Get().multiprocess_; }
+  inline static void set_multiprocess(bool val) { Get().multiprocess_ = val; }
+  inline static bool root_solver() { return Get().solver_rank_ == 0; }
 
  protected:
 #ifndef CPU_ONLY
@@ -172,8 +175,11 @@ class Caffe {
   shared_ptr<RNG> random_generator_;
 
   Brew mode_;
+
+  // Parallel training
   int solver_count_;
-  bool root_solver_;
+  int solver_rank_;
+  bool multiprocess_;
 
  private:
   // The private constructor to avoid duplicate instantiation.
 
@@ -42,8 +42,8 @@ class InternalThread {
   bool must_stop();
 
  private:
-  void entry(int device, Caffe::Brew mode, int rand_seed, int solver_count,
-      bool root_solver);
+  void entry(int device, Caffe::Brew mode, int rand_seed,
+      int solver_count, int solver_rank, bool multiprocess);
 
   shared_ptr<boost::thread> thread_;
 };