|
5 | 5 | import numpy as np
|
6 | 6 | import tensorflow as tf
|
7 | 7 | import pandas as pd
|
| 8 | +import os |
8 | 9 |
|
9 | 10 | from caicloud.clever.tensorflow import dist_base
|
10 | 11 | from caicloud.clever.tensorflow import model_exporter
|
11 | 12 |
|
12 |
| -tf.app.flags.DEFINE_string("export_dir", |
13 |
| - "/tmp/saved_model/movie", |
14 |
| - "model export directory path.") |
15 |
| - |
16 |
| -tf.app.flags.DEFINE_string("batch_size", 128, "training batch size.") |
17 |
| -tf.app.flags.DEFINE_string("embedding_dim", 50, "embedding dimension.") |
| 13 | +tf.app.flags.DEFINE_string("export_dir", "/tmp/saved_model/movie", "model export directory path.") |
| 14 | +tf.app.flags.DEFINE_string("data_dir", "/caicloud/admin/hengfengPOC/data", "path where data is located.") |
18 | 15 |
|
| 16 | +tf.app.flags.DEFINE_integer("batch_size", 128, "training batch size.") |
| 17 | +tf.app.flags.DEFINE_integer("embedding_dim", 50, "embedding dimension.") |
| 18 | +tf.app.flags.DEFINE_float("learning_rate", 0.01, "learning rate.") |
19 | 19 | FLAGS = tf.app.flags.FLAGS
|
| 20 | + |
20 | 21 | USER_NUM = 6040
|
21 | 22 | ITEM_NUM = 3952
|
22 | 23 |
|
23 | 24 | def get_data():
|
24 | 25 | col_names = ["user", "item", "rate", "st"]
|
25 |
| - df = pd.read_csv("/tmp/movielens/ml-1m/ratings.dat", sep="::", header=None, names=col_names, engine='python') |
| 26 | + datafile = os.path.join(FLAGS.data_dir, "ml-1m/ratings.dat") |
| 27 | + df = pd.read_csv(datafile, sep="::", header=None, names=col_names, engine='python') |
26 | 28 |
|
27 | 29 | df["user"] -= 1
|
28 | 30 | df["item"] -= 1
|
@@ -96,9 +98,19 @@ def model_fn(sync, num_replicas):
|
96 | 98 | _global_step = tf.contrib.framework.get_or_create_global_step()
|
97 | 99 |
|
98 | 100 | _cost = tf.square(_infer - _rate_batch)
|
99 |
| - optimizer = tf.train.AdamOptimizer(0.001) |
100 |
| - _train_op = optimizer.minimize(_cost, global_step=_global_step) |
101 |
| - |
| 101 | + optimizer = tf.train.GradientDescentOptimizer(FLAGS.learning_rate) |
| 102 | + |
| 103 | + if sync: |
| 104 | + optimizer = tf.train.SyncReplicasOptimizer( |
| 105 | + optimizer, |
| 106 | + replicas_to_aggregate=num_replicas, |
| 107 | + total_num_replicas=num_replicas, |
| 108 | + name="mnist_sync_replicas") |
| 109 | + |
| 110 | + gradients, variables = zip(*optimizer.compute_gradients(_cost)) |
| 111 | + gradients, _ = tf.clip_by_global_norm(gradients, 5.0) |
| 112 | + _train_op = optimizer.apply_gradients(zip(gradients, variables), global_step=_global_step) |
| 113 | + |
102 | 114 | _rmse = tf.sqrt(tf.reduce_mean(_cost))
|
103 | 115 |
|
104 | 116 | def rmse_evalute_fn(session):
|
@@ -129,8 +141,11 @@ def train_fn(session, num_global_step):
|
129 | 141 | users, items, rates = next(_iter_train)
|
130 | 142 | session.run(_train_op, feed_dict={_user_batch: users, _item_batch: items, _rate_batch: rates})
|
131 | 143 |
|
132 |
| - if _local_step % 2000 == 0: |
133 |
| - rmse, infer, cost = session.run([_rmse, _infer, _cost], feed_dict={_user_batch: _test["user"], _item_batch: _test["item"], _rate_batch: _test["rate"]}) |
| 144 | + if _local_step % 200 == 0: |
| 145 | + rmse, infer, cost = session.run( |
| 146 | + [_rmse, _infer, _cost], |
| 147 | + feed_dict={_user_batch: _test["user"], _item_batch: _test["item"], _rate_batch: _test["rate"]}) |
| 148 | + |
134 | 149 | print("Eval RMSE at round {} is: {}".format(num_global_step, rmse))
|
135 | 150 |
|
136 | 151 | _local_step += 1
|
|
0 commit comments