@@ -20,11 +20,11 @@ def clean_str(sentence):
20
20
21
21
22
22
def get_text_list (data_path , toy ):
23
- with open (data_path , "r" , encoding = ' utf-8' ) as f :
23
+ with open (data_path , "r" , encoding = " utf-8" ) as f :
24
24
if not toy :
25
- return list ( map ( lambda x : clean_str (x .strip ()), f .readlines ()))
25
+ return [ clean_str (x .strip ()) for x in f .readlines ()]
26
26
else :
27
- return list ( map ( lambda x : clean_str (x .strip ()), f .readlines ())) [:50000 ]
27
+ return [ clean_str (x .strip ()) for x in f .readlines ()] [:50000 ]
28
28
29
29
30
30
def build_dict (step , toy = False ):
@@ -70,17 +70,17 @@ def build_dataset(step, word_dict, article_max_len, summary_max_len, toy=False):
70
70
else :
71
71
raise NotImplementedError
72
72
73
- x = list ( map ( lambda d : word_tokenize (d ), article_list ))
74
- x = list ( map ( lambda d : list ( map ( lambda w : word_dict .get (w , word_dict ["<unk>" ]), d )), x ))
75
- x = list ( map ( lambda d : d [:article_max_len ], x ))
76
- x = list ( map ( lambda d : d + (article_max_len - len (d )) * [word_dict ["<padding>" ]], x ))
77
-
73
+ x = [ word_tokenize (d ) for d in article_list ]
74
+ x = [[ word_dict .get (w , word_dict ["<unk>" ]) for w in d ] for d in x ]
75
+ x = [ d [:article_max_len ] for d in x ]
76
+ x = [ d + (article_max_len - len (d )) * [word_dict ["<padding>" ]] for d in x ]
77
+
78
78
if step == "valid" :
79
79
return x
80
- else :
81
- y = list ( map ( lambda d : word_tokenize (d ), title_list ))
82
- y = list ( map ( lambda d : list ( map ( lambda w : word_dict .get (w , word_dict ["<unk>" ]), d )), y ))
83
- y = list ( map ( lambda d : d [:(summary_max_len - 1 )], y ))
80
+ else :
81
+ y = [ word_tokenize (d ) for d in title_list ]
82
+ y = [[ word_dict .get (w , word_dict ["<unk>" ]) for w in d ] for d in y ]
83
+ y = [ d [:(summary_max_len - 1 )] for d in y ]
84
84
return x , y
85
85
86
86
0 commit comments