add movie recommandation example.

perhapszzy · perhapszzy · commit 75b9b3ac5c5e · 2017-05-18T00:52:58.000+08:00
diff --git a/Dockerfile b/Dockerfile
@@ -1,4 +1,4 @@
-FROM tensorflow/tensorflow:0.12.0
+FROM tensorflow/tensorflow:1.0.0
 
 ENV LANG C.UTF-8
 RUN apt-get update && apt-get install -y bc
@@ -9,7 +9,7 @@ RUN rm -rf /notebooks/*
 
 COPY caicloud.tensorflow /caicloud.tensorflow
 COPY Deep_Learning_with_TensorFlow/datasets /notebooks/Deep_Learning_with_TensorFlow/datasets
-COPY Deep_Learning_with_TensorFlow/0.12.0 /notebooks/Deep_Learning_with_TensorFlow/0.12.0
+COPY Deep_Learning_with_TensorFlow/1.0.0 /notebooks/Deep_Learning_with_TensorFlow/1.0.0
 COPY run_tf.sh /run_tf.sh
 
 CMD ["/run_tf.sh"]
diff --git a/caicloud.tensorflow/caicloud/clever/examples/recommandation/README.md b/caicloud.tensorflow/caicloud/clever/examples/recommandation/README.md
@@ -0,0 +1,41 @@
+# 使用TensorFlow解决推荐问题
+
+## 数据集
+这里使用[电影评级数据集](http://grouplens.org/datasets/movielens/)来模拟推荐问题。该数据集中数据格式如下：
+```
+1::1193::5::978300760
+1::661::3::978302109
+1::914::3::978301968
+1::3408::4::978300275
+1::2355::5::978824291
+```
+每一行包含了一个用户对一个电影的评分。比如第一行表示用户1对电影1193评分为5。数据中最后一列为时间戳，在本样例中我们并没有使用时间戳信息。这里我们的目标是对于给定的（用户，电影）对，预测给定用户对给定电影的评分。
+
+运行一下命令可以下载数据：
+```
+./download_data.sh
+```
+
+
+## 任务训练
+通过以下脚本可以在本地训练：
+```
+./train_model.sh
+```
+
+运行改脚本可以得到类似下面的结果：
+```
+Training begins @ 2017-05-18 00:24:33.373159
+Eval RMSE at round 0 is: 2.81291127205
+Eval RMSE at round 2000 is: 0.945966959
+Eval RMSE at round 4000 is: 0.933194696903
+Eval RMSE at round 6000 is: 0.927836835384
+Eval RMSE at round 8000 is: 0.923974812031
+Eval RMSE at round 10000 is: 0.92291110754
+Eval RMSE at round 12000 is: 0.919465661049
+Eval RMSE at round 14000 is: 0.918680250645
+Eval RMSE at round 16000 is: 0.917023718357
+Eval RMSE at round 18000 is: 0.915674805641
+Eval RMSE at round 20000 is: 0.91452050209
+Eval RMSE at round 22000 is: 0.915164649487
+```
diff --git a/caicloud.tensorflow/caicloud/clever/examples/recommandation/download_data.sh b/caicloud.tensorflow/caicloud/clever/examples/recommandation/download_data.sh
@@ -0,0 +1,7 @@
+#!/usr/bin/env bash
+
+DATA_DIR=/tmp/movielens
+SIZE=1m
+mkdir -p ${DATA_DIR}
+wget http://files.grouplens.org/datasets/movielens/ml-${SIZE}.zip -O ${DATA_DIR}/ml-${SIZE}.zip
+unzip ${DATA_DIR}/ml-${SIZE}.zip -d ${DATA_DIR}
diff --git a/caicloud.tensorflow/caicloud/clever/examples/recommandation/train.py b/caicloud.tensorflow/caicloud/clever/examples/recommandation/train.py
@@ -0,0 +1,141 @@
+# coding=utf-8
+
+import time
+
+import numpy as np
+import tensorflow as tf
+import pandas as pd
+
+from caicloud.clever.tensorflow import dist_base
+from caicloud.clever.tensorflow import model_exporter
+
+tf.app.flags.DEFINE_string("export_dir",
+                           "/tmp/saved_model/movie",
+                           "model export directory path.")
+
+tf.app.flags.DEFINE_string("batch_size", 128, "training batch size.")
+tf.app.flags.DEFINE_string("embedding_dim", 50, "embedding dimension.")
+
+FLAGS = tf.app.flags.FLAGS
+USER_NUM = 6040
+ITEM_NUM = 3952
+
+def get_data():
+    col_names = ["user", "item", "rate", "st"]
+    df = pd.read_csv("/tmp/movielens/ml-1m/ratings.dat", sep="::", header=None, names=col_names, engine='python')
+    
+    df["user"] -= 1
+    df["item"] -= 1
+    for col in ("user", "item"):
+        df[col] = df[col].astype(np.int32)
+    df["rate"] = df["rate"].astype(np.float32)
+    
+    rows = len(df)
+    print "Total number of instances: ", rows
+    df = df.iloc[np.random.permutation(rows)].reset_index(drop=True)
+    split_index = int(rows * 0.9)
+    return df[0:split_index], df[split_index:]
+
+class ShuffleIterator(object):
+    def __init__(self, inputs, batch_size=10):
+        self.inputs = inputs
+        self.batch_size = batch_size
+        self.num_cols = len(self.inputs)
+        self.len = len(self.inputs[0])
+        self.inputs = np.transpose(np.vstack([np.array(self.inputs[i]) for i in range(self.num_cols)]))
+
+    def __len__(self):
+        return self.len
+
+    def __iter__(self):
+        return self
+
+    def __next__(self):
+        return self.next()
+
+    def next(self):
+        ids = np.random.randint(0, self.len, (self.batch_size,))
+        out = self.inputs[ids, :]
+        return [out[:, i] for i in range(self.num_cols)]
+
+_train, _test = get_data()
+_iter_train = ShuffleIterator([_train["user"], _train["item"], _train["rate"]], batch_size=FLAGS.batch_size)
+_train_op = None
+_infer = None
+_global_step = None
+_user_batch = None
+_item_batch = None
+_rate_batch = None
+_cost = None
+_rmse = None
+_local_step = 0
+
+def inference(user_batch, item_batch, dim):
+    w_user = tf.get_variable("embd_user", shape=[USER_NUM, dim],
+                             initializer=tf.truncated_normal_initializer(stddev=0.02))
+    w_item = tf.get_variable("embd_item", shape=[ITEM_NUM, dim],
+                             initializer=tf.truncated_normal_initializer(stddev=0.02))
+    
+    input1 = tf.nn.embedding_lookup(w_user, user_batch)
+    input2 = tf.nn.embedding_lookup(w_item, item_batch)
+    input = tf.concat([input1, input2], 1)
+
+    w = tf.get_variable("w", shape=[2*dim, 1], initializer=tf.truncated_normal_initializer(stddev=0.02))
+    b = tf.get_variable("b", shape=[1], initializer=tf.constant_initializer(1))
+    infer = tf.transpose(tf.matmul(input, w) + b, name="infer")
+    return infer
+
+def model_fn(sync, num_replicas):
+    global _train_op, _infer, _user_batch, _item_batch, _rate_batch, _rmse, _cost, _global_step
+    
+    _user_batch = tf.placeholder(tf.int32, shape=[None], name="user")
+    _item_batch = tf.placeholder(tf.int32, shape=[None], name="item")
+    _rate_batch = tf.placeholder(tf.float32, shape=[None], name="rate")
+
+    _infer = inference(_user_batch, _item_batch, FLAGS.embedding_dim)
+    _global_step = tf.contrib.framework.get_or_create_global_step()
+    
+    _cost = tf.square(_infer - _rate_batch)
+    optimizer = tf.train.AdamOptimizer(0.001)
+    _train_op = optimizer.minimize(_cost, global_step=_global_step)
+
+    _rmse = tf.sqrt(tf.reduce_mean(_cost))
+    
+    def rmse_evalute_fn(session):
+        return session.run(_rmse, feed_dict={
+            _user_batch: _test["user"], _item_batch: _test["item"], _rate_batch: _test["rate"]})
+
+    # 定义模型导出配置
+    model_export_spec = model_exporter.ModelExportSpec(
+        export_dir=FLAGS.export_dir,
+        input_tensors={"user": _user_batch, "item": _item_batch},
+        output_tensors={"infer": _infer})
+
+    # 定义模型评测（准确率）的计算方法
+    model_metric_ops = {
+        "rmse": rmse_evalute_fn
+    }
+    
+    return dist_base.ModelFnHandler(
+        global_step=_global_step,
+        optimizer=optimizer, 
+        model_metric_ops=model_metric_ops,
+        model_export_spec=model_export_spec,
+        summary_op=None)
+    
+def train_fn(session, num_global_step):
+    global _train_op, _infer, _user_batch, _item_batch, _rate_batch, _rmse, _local_step, _cost
+    
+    users, items, rates = next(_iter_train)            
+    session.run(_train_op, feed_dict={_user_batch: users, _item_batch: items, _rate_batch: rates})
+            
+    if _local_step % 2000 == 0:
+        rmse, infer, cost = session.run([_rmse, _infer, _cost], feed_dict={_user_batch: _test["user"], _item_batch: _test["item"], _rate_batch: _test["rate"]})
+        print("Eval RMSE at round {} is: {}".format(num_global_step, rmse))
+    
+    _local_step += 1        
+    return False
+
+if __name__ == '__main__':
+    distTfRunner = dist_base.DistTensorflowRunner(model_fn = model_fn, gen_init_fn=None)
+    distTfRunner.run(train_fn)
diff --git a/caicloud.tensorflow/caicloud/clever/examples/recommandation/train_model.sh b/caicloud.tensorflow/caicloud/clever/examples/recommandation/train_model.sh
@@ -0,0 +1,24 @@
+#!/bin/bash
+#
+# Copyright 2017 Caicloud authors. All Rights Reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+# ==============================================================================
+
+rm -rf /tmp/caicloud-dist-tf
+rm -rf /tmp/saved_model/movie
+
+export TF_MAX_STEPS=30000
+export TF_SAVE_CHECKPOINTS_SECS=60
+export TF_SAVE_SUMMARIES_STEPS=1000
+python train.py