Qihoo360
diff --git a/‎.gitlab-ci.yml
+2-3 b/‎.gitlab-ci.yml
+2-3
diff --git a/‎core/BUILD
-2 b/‎core/BUILD
-2
diff --git a/‎core/ps/optimizer/ada_grad_kernel.cc
+13-1 b/‎core/ps/optimizer/ada_grad_kernel.cc
+13-1
diff --git a/‎core/ps/optimizer/ada_grad_kernel.h
+1 b/‎core/ps/optimizer/ada_grad_kernel.h
+1
diff --git a/‎core/ps/optimizer/data_struct.cc
+1 b/‎core/ps/optimizer/data_struct.cc
+1
diff --git a/‎core/ps/optimizer/data_struct.h
+5-1 b/‎core/ps/optimizer/data_struct.h
+5-1
diff --git a/‎core/ps/optimizer/optimizer.h
+29 b/‎core/ps/optimizer/optimizer.h
+29
diff --git a/‎core/ps/optimizer/optimizer_kernel.h
+19-1 b/‎core/ps/optimizer/optimizer_kernel.h
+19-1
diff --git a/‎core/ps/table/sparse_table.cc
+5-1 b/‎core/ps/table/sparse_table.cc
+5-1
diff --git a/‎core/utility/file_io.cc
+4-1 b/‎core/utility/file_io.cc
+4-1
diff --git a/‎core/utility/file_io.h
+6 b/‎core/utility/file_io.h
+6
diff --git a/‎manager
+12-1 b/‎manager
+12-1
diff --git a/‎tensornet/callbacks/callbacks.py
+4-2 b/‎tensornet/callbacks/callbacks.py
+4-2
diff --git a/‎tensornet/feature_column/__init__.py
+2-1 b/‎tensornet/feature_column/__init__.py
+2-1
@@ -19,14 +19,13 @@ tn_build:
   script:
     - sed -i "s|https://github.com|${NEXUS3_HEADER}/github.com|g" WORKSPACE
     - ./manager build
-    - ./manager deploy
+    - ./manager copy-libs
+    - ./manager test
   cache:
     - key: cache-$CI_COMMIT_REF_NAME
       paths:
         - /root/.cache/bazel/_bazel_root/cache
         - /root/.cache/bazel/_bazel_root/install
         - /root/micromamba/pkgs
   when: manual
-  #only:
-  #- tags
 
@@ -148,8 +148,6 @@ cc_binary(
     linkopts = [
      "-Wl,-rpath,$$ORIGIN/../libs",
      "-Wl,-rpath,$$ORIGIN/../../../../../lib",
-     "-Wl,-rpath,$$ORIGIN/../../tensorflow",
-     "-Wl,-rpath,$$ORIGIN/../../tensorflow/python"
     ],
     linkshared = 1,
 )
@@ -95,6 +95,8 @@ SparseAdaGradValue::SparseAdaGradValue(int dim, const AdaGrad* opt) {
     }
 
     g2sum_ = opt->initial_g2sum;
+    old_compat_ = false;
+    no_show_days_ = 0;
 }
 
 void SparseAdaGradValue::Apply(const AdaGrad* opt, SparseGradInfo& grad_info, int dim) {
@@ -117,33 +119,43 @@ void SparseAdaGradValue::Apply(const AdaGrad* opt, SparseGradInfo& grad_info, in
 }
 
 void SparseAdaGradValue::SerializeTxt_(std::ostream& os, int dim) {
+    os << dim << "\t";
     for (int i = 0; i < dim; i++) {
         os << Weight()[i] << "\t";
     }
 
     os << g2sum_ << "\t";
-    os << show_;
+    os << show_ << "\t";
+    os << no_show_days_;
 }
 
 void SparseAdaGradValue::DeSerializeTxt_(std::istream& is, int dim) {
+    is >> dim_;
     for (int i = 0; i < dim; i++) {
         is >> Weight()[i];
     }
 
     is >> g2sum_;
     is >> show_;
+    if(!old_compat_) {
+        is >> no_show_days_;
+    }
 }
 
 void SparseAdaGradValue::SerializeBin_(std::ostream& os, int dim) {
     os.write(reinterpret_cast<const char*>(Weight()), dim * sizeof(float));
     os.write(reinterpret_cast<const char*>(&g2sum_), sizeof(g2sum_));
     os.write(reinterpret_cast<const char*>(&show_), sizeof(show_));
+    os.write(reinterpret_cast<const char*>(&no_show_days_), sizeof(no_show_days_));
 }
 
 void SparseAdaGradValue::DeSerializeBin_(std::istream& is, int dim) {
     is.read(reinterpret_cast<char*>(Weight()), dim * sizeof(float));
     is.read(reinterpret_cast<char*>(&g2sum_), sizeof(g2sum_));
     is.read(reinterpret_cast<char*>(&show_), sizeof(show_));
+    if(!old_compat_) {
+        is.read(reinterpret_cast<char*>(&no_show_days_), sizeof(no_show_days_));
+    }
 }
 
 void SparseAdaGradValue::ShowDecay(const AdaGrad* opt, int delta_days) {
 
@@ -85,6 +85,7 @@ class alignas(4) SparseAdaGradValue
     virtual void DeSerializeBin_(std::istream& is, int dim);
 
 private:
+    int dim_;
     float g2sum_;
     float show_ = 0.0;
     int no_show_days_ = 0;
 
@@ -42,5 +42,6 @@ void SparseOptValue::DeSerialize(std::istream& is, int dim) {
     }
 }
 
+
 } // namespace tensornet
 
@@ -40,6 +40,10 @@ class alignas(4) SparseOptValue {
 
     void DeSerialize(std::istream& is, int dim);
 
+    void SetOldCompat(bool old_compat) {
+	old_compat_ = old_compat;
+    }
+
     float Show() const {
         return show_;
     }
@@ -53,7 +57,7 @@ class alignas(4) SparseOptValue {
 protected:
     float show_ = 0.0;
     int delta_show_ = 0;
-
+    bool old_compat_ = false;
 };
 
 } // namespace tensornet {
 
@@ -18,6 +18,8 @@
 #include <vector>
 #include <memory>
 #include <string>
+#include <iostream>
+#include <sstream>
 
 namespace tensornet {
 
@@ -36,6 +38,11 @@ class OptimizerBase {
 
     virtual std::string Name() const = 0;
 
+    virtual std::tuple<bool, std::string> NeedOldCompat(std::istream& is, int dim) const {
+        std::string emptyString = "";
+        return std::make_tuple(false, emptyString);
+    }
+
 public:
     float learning_rate = 0.01;
     float show_decay_rate = 0.98;
@@ -70,6 +77,28 @@ class AdaGrad : public OptimizerBase {
         return "AdaGrad";
     }
 
+    std::tuple<bool, std::string> NeedOldCompat(std::istream& is, int dim) const {
+        bool need_old_compat = false;
+        std::string line;
+        std::string cell;
+        std::getline(is, line); // 抹去换行符
+        std::getline(is, line);
+        std::istringstream iss(line);
+        int column_count = 0;
+
+        while (std::getline(iss, cell, '\t')) {
+            ++column_count;
+        }
+
+        // columns should be sign, dim_, dims_ * weight, g2sum, show, no_show_days
+        // if columnCount is 12,  means no no_show_days column
+        if(column_count == dim + 4){
+            need_old_compat = true;
+        }
+
+        return std::make_tuple(need_old_compat, line);
+    }
+
 public:
     float initial_g2sum = 0;
     float initial_scale = 1.0;
 
@@ -26,6 +26,8 @@
 #include <butil/iobuf.h>
 #include <butil/logging.h>
 #include <Eigen/Dense>
+#include <cstring>
+#include <cstdio>
 
 #include <boost/iostreams/stream.hpp>
 
@@ -389,10 +391,22 @@ class SparseKernelBlock {
                 << " you must make sure that use same optimizer when incremental training";
 
             is.ignore(std::numeric_limits<std::streamsize>::max(), ':') >> block.dim_;
+            
+	    std::tuple<bool, std::string> tuple = block.opt_->NeedOldCompat(is, block.dim_);
+            bool need_old_compat = std::get<0>(tuple);
+            std::string sample_line = std::get<1>(tuple);
+            std::istringstream sample_is(sample_line);
 
             uint64_t sign = 0;
+	    while (sample_is >> sign) {
+		    ValueType* value = block.alloc_.allocate(block.dim_, block.opt_);
+		    value->SetOldCompat(need_old_compat);
+		    value->DeSerialize(sample_is, block.dim_);
+		    block.values_[sign] = value;
+	    }
             while (is >> sign) {
                 ValueType* value = block.alloc_.allocate(block.dim_, block.opt_);
+		value->SetOldCompat(need_old_compat);
                 value->DeSerialize(is, block.dim_);
                 block.values_[sign] = value;
             }
@@ -495,7 +509,11 @@ class SparseOptimizerKernel : public SparseOptimizerKernelBase {
         for (size_t i = 0; i < SPARSE_KERNEL_BLOCK_NUM; ++i) {
             threads.push_back(std::thread([this, i, &mode, &filepath]() {
                 std::string file = filepath;
-                file.append("/block_").append(std::to_string(i)).append(".gz");
+                if(FileUtils::CheckFileExists(filepath + "/block_" + std::to_string(i) + ".gz")){
+                    file.append("/block_").append(std::to_string(i)).append(".gz");
+                } else {
+                    file.append("/sparse_block_").append(std::to_string(i)).append(".gz");
+                }
 
                 FileReaderSource reader_source(file, FCT_ZLIB);
                 boost::iostreams::stream<FileReaderSource> in_stream(reader_source);
 
@@ -116,7 +116,11 @@ void SparseTable::Load(const std::string& filepath, const std::string& mode) {
     if (name_.empty()) {
         file += std::to_string(GetHandle());
     } else {
-        file += name_;
+        if(FileUtils::CheckFileExists(file + name_)){
+            file += name_;
+        } else {
+            file += std::to_string(GetHandle());
+        }
     }
 
     file += "/rank_" + std::to_string(self_shard_id_);
 
@@ -139,7 +139,6 @@ FileReaderSource::FileReaderSource(const std::string& file,
     reader_ = std::make_shared<ReaderInternal>(reader.release(), compression_type);
 }
 
-
 FileReaderSource::~FileReaderSource() {
     reader_ = nullptr;
 }
@@ -162,5 +161,9 @@ std::streamsize FileReaderSource::read(char_type* str, std::streamsize n) {
     return buffer.size();
 }
 
+bool FileUtils::CheckFileExists(const std::string& filepath) {
+   return tensorflow::Env::Default()-> FileExists(filepath).ok();
+}
+
 } // namespace tensornet
 
@@ -70,6 +70,12 @@ class FileReaderSource {
 
 };
 
+class FileUtils {
+public:
+    static bool CheckFileExists(const std::string& filepath);
+
+};
+
 }  // namespace tensornet
 
 #endif  // TENSORNET_UTILITY_SEMAPHORE_H_
@@ -82,7 +82,14 @@ start_copy_libs(){
 }
 
 start_test(){
-    python -c "import tensorflow as tf;import tensornet as tn;tn.core.init()"
+    [[ ${NEED_ACTIVATE_ENV} == true ]] && _activate_env
+    export PYTHONPATH=${WORKSPACE_DIR}:${PYTHONPATH}
+    MPI_LIB_PATH=$(ompi_info --parsable --path prefix 2>/dev/null | awk -F":" '{print $NF}')
+    export LD_LIBRARY_PATH=${MPI_LIB_PATH}/lib:${LD_LIBRARY_PATH}
+    cd examples
+    rm -rf data model || true
+    python gen_example_data.py
+    python main.py 
 }
 
 
@@ -139,6 +146,10 @@ case "$1" in
   shift 1
   start_create_dist "$@"
   ;;
+(test)
+  shift 1
+  start_test "$@"
+  ;;
 (help)
   cmd=$(basename -- "$0")
   cat <<-END
 
@@ -24,7 +24,7 @@ class PsWeightCheckpoint(Callback):
     """
 
     def __init__(self, checkpoint_dir, checkpoint_save=None, need_save_model=False, dt=None, delta_days=0, save_mode="txt",
-                 model_path_incl_dt=False):
+                 model_path_incl_dt=False, **kwargs):
         """
         :param checkpoint_dir: path of save model
         :param need_save_model: whether save model
@@ -34,6 +34,7 @@ def __init__(self, checkpoint_dir, checkpoint_save=None, need_save_model=False,
         self.checkpoint_dir = checkpoint_dir
         self.checkpoint_save = checkpoint_save if checkpoint_save else checkpoint_dir
         self.need_save_model = need_save_model
+        self.need_load_model = kwargs.get('need_load_model', True)
         self.save_mode = save_mode
         self.model_path_incl_dt = model_path_incl_dt
         self.dt = dt
@@ -43,7 +44,8 @@ def __init__(self, checkpoint_dir, checkpoint_save=None, need_save_model=False,
 
     def load_model(self):
         tn.core.barrier()
-        self.model.load_weights(self.checkpoint_dir, include_dt=self.model_path_incl_dt, mode=self.save_mode)
+        if self.need_load_model:
+            self.model.load_weights(self.checkpoint_dir, include_dt=self.model_path_incl_dt, mode=self.save_mode)
         tn.core.barrier()
 
     def reset_balance_dataset(self):
 
@@ -12,4 +12,5 @@
 # See the License for the specific language governing permissions and
 # limitations under the License.
 
-from .category_column import *
+from .category_column import *
+from .sequence_category_column import *
Original file line number	Diff line number	Diff line change
`@@ -148,8 +148,6 @@ cc_binary(`
`148`	`148`	`linkopts = [`
`149`	`149`	`"-Wl,-rpath,$$ORIGIN/../libs",`
`150`	`150`	`"-Wl,-rpath,$$ORIGIN/../../../../../lib",`
`151`		`- "-Wl,-rpath,$$ORIGIN/../../tensorflow",`
`152`		`- "-Wl,-rpath,$$ORIGIN/../../tensorflow/python"`
`153`	`151`	`],`
`154`	`152`	`linkshared = 1,`
`155`	`153`	`)`
Original file line number	Diff line number	Diff line change
`@@ -95,6 +95,8 @@ SparseAdaGradValue::SparseAdaGradValue(int dim, const AdaGrad* opt) {`
`95`	`95`	`}`
`96`	`96`
`97`	`97`	`g2sum_ = opt->initial_g2sum;`
	`98`	`+ old_compat_ = false;`
	`99`	`+ no_show_days_ = 0;`
`98`	`100`	`}`
`99`	`101`
`100`	`102`	`void SparseAdaGradValue::Apply(const AdaGrad* opt, SparseGradInfo& grad_info, int dim) {`
`@@ -117,33 +119,43 @@ void SparseAdaGradValue::Apply(const AdaGrad* opt, SparseGradInfo& grad_info, in`
`117`	`119`	`}`
`118`	`120`
`119`	`121`	`void SparseAdaGradValue::SerializeTxt_(std::ostream& os, int dim) {`
	`122`	`+ os << dim << "\t";`
`120`	`123`	`for (int i = 0; i < dim; i++) {`
`121`	`124`	`os << Weight()[i] << "\t";`
`122`	`125`	`}`
`123`	`126`
`124`	`127`	`os << g2sum_ << "\t";`
`125`		`- os << show_;`
	`128`	`+ os << show_ << "\t";`
	`129`	`+ os << no_show_days_;`
`126`	`130`	`}`
`127`	`131`
`128`	`132`	`void SparseAdaGradValue::DeSerializeTxt_(std::istream& is, int dim) {`
	`133`	`+ is >> dim_;`
`129`	`134`	`for (int i = 0; i < dim; i++) {`
`130`	`135`	`is >> Weight()[i];`
`131`	`136`	`}`
`132`	`137`
`133`	`138`	`is >> g2sum_;`
`134`	`139`	`is >> show_;`
	`140`	`+ if(!old_compat_) {`
	`141`	`+ is >> no_show_days_;`
	`142`	`+ }`
`135`	`143`	`}`
`136`	`144`
`137`	`145`	`void SparseAdaGradValue::SerializeBin_(std::ostream& os, int dim) {`
`138`	`146`	`os.write(reinterpret_cast<const char>(Weight()), dim sizeof(float));`
`139`	`147`	`os.write(reinterpret_cast<const char*>(&g2sum_), sizeof(g2sum_));`
`140`	`148`	`os.write(reinterpret_cast<const char*>(&show_), sizeof(show_));`
	`149`	`+ os.write(reinterpret_cast<const char*>(&no_show_days_), sizeof(no_show_days_));`
`141`	`150`	`}`
`142`	`151`
`143`	`152`	`void SparseAdaGradValue::DeSerializeBin_(std::istream& is, int dim) {`
`144`	`153`	`is.read(reinterpret_cast<char>(Weight()), dim sizeof(float));`
`145`	`154`	`is.read(reinterpret_cast<char*>(&g2sum_), sizeof(g2sum_));`
`146`	`155`	`is.read(reinterpret_cast<char*>(&show_), sizeof(show_));`
	`156`	`+ if(!old_compat_) {`
	`157`	`+ is.read(reinterpret_cast<char*>(&no_show_days_), sizeof(no_show_days_));`
	`158`	`+ }`
`147`	`159`	`}`
`148`	`160`
`149`	`161`	`void SparseAdaGradValue::ShowDecay(const AdaGrad* opt, int delta_days) {`
Original file line number	Diff line number	Diff line change
`@@ -42,5 +42,6 @@ void SparseOptValue::DeSerialize(std::istream& is, int dim) {`
`42`	`42`	`}`
`43`	`43`	`}`
`44`	`44`
	`45`	`+`
`45`	`46`	`} // namespace tensornet`
`46`	`47`
Original file line number	Diff line number	Diff line change
`@@ -139,7 +139,6 @@ FileReaderSource::FileReaderSource(const std::string& file,`
`139`	`139`	`reader_ = std::make_shared<ReaderInternal>(reader.release(), compression_type);`
`140`	`140`	`}`
`141`	`141`
`142`		`-`
`143`	`142`	`FileReaderSource::~FileReaderSource() {`
`144`	`143`	`reader_ = nullptr;`
`145`	`144`	`}`
`@@ -162,5 +161,9 @@ std::streamsize FileReaderSource::read(char_type* str, std::streamsize n) {`
`162`	`161`	`return buffer.size();`
`163`	`162`	`}`
`164`	`163`
	`164`	`+bool FileUtils::CheckFileExists(const std::string& filepath) {`
	`165`	`+ return tensorflow::Env::Default()-> FileExists(filepath).ok();`
	`166`	`+}`
	`167`	`+`
`165`	`168`	`} // namespace tensornet`
`166`	`169`