Change data_utils function

dongjun-Lee · dongjun-Lee · commit 2721381ab537 · 2018-09-12T16:47:25.000+09:00
diff --git a/test.py b/test.py
@@ -10,7 +10,7 @@
 print("Loading dictionary...")
 word_dict, reversed_dict, article_max_len, summary_max_len = build_dict("valid", args.toy)
 print("Loading validation dataset...")
-valid_x, valid_y = build_dataset("valid", word_dict, article_max_len, summary_max_len, args.toy)
+valid_x = build_dataset("valid", word_dict, article_max_len, summary_max_len, args.toy)
 valid_x_len = list(map(lambda x: len([y for y in x if y != 0]), valid_x))
 
 with tf.Session() as sess:
@@ -20,10 +20,10 @@
     ckpt = tf.train.get_checkpoint_state("./saved_model/")
     saver.restore(sess, ckpt.model_checkpoint_path)
 
-    batches = batch_iter(valid_x, valid_y, args.batch_size, 1)
+    batches = batch_iter(valid_x, [0] * len(valid_x), args.batch_size, 1)
 
     print("Writing summaries to 'result.txt'...")
-    for batch_x, batch_y in batches:
+    for batch_x, _ in batches:
         batch_x_len = list(map(lambda x: len([y for y in x if y != 0]), batch_x))
 
         valid_feed_dict = {
diff --git a/utils.py b/utils.py
@@ -67,7 +67,6 @@ def build_dataset(step, word_dict, article_max_len, summary_max_len, toy=False):
         title_list = get_text_list(train_title_path, toy)
     elif step == "valid":
         article_list = get_text_list(valid_article_path, toy)
-        title_list = get_text_list(valid_title_path, toy)
     else:
         raise NotImplementedError
 
@@ -76,11 +75,13 @@ def build_dataset(step, word_dict, article_max_len, summary_max_len, toy=False):
     x = list(map(lambda d: d[:article_max_len], x))
     x = list(map(lambda d: d + (article_max_len - len(d)) * [word_dict["<padding>"]], x))
 
-    y = list(map(lambda d: word_tokenize(d), title_list))
-    y = list(map(lambda d: list(map(lambda w: word_dict.get(w, word_dict["<unk>"]), d)), y))
-    y = list(map(lambda d: d[:(summary_max_len-1)], y))
-
-    return x, y
+    if step == "valid":
+        return x
+    else:
+        y = list(map(lambda d: word_tokenize(d), title_list))
+        y = list(map(lambda d: list(map(lambda w: word_dict.get(w, word_dict["<unk>"]), d)), y))
+        y = list(map(lambda d: d[:(summary_max_len-1)], y))
+        return x, y
 
 
 def batch_iter(inputs, outputs, batch_size, num_epochs):