Skip to content

Commit 7550448

Browse files
authored
Update invertedIndex.scala
原本连着三个map太慢了,可以优化一下
1 parent 0141e00 commit 7550448

File tree

1 file changed

+5
-4
lines changed

1 file changed

+5
-4
lines changed

spark-demo/invertedIndex/src/main/scala/invertedIndex.scala

+5-4
Original file line numberDiff line numberDiff line change
@@ -18,10 +18,11 @@ object InvertedIndex{
1818
val cfg = new HierConf(args(0))
1919
val inputfile = cfg.getString("inputfile")
2020
val result = sc.textFile(inputfile)
21-
.map(x => x.split("\t"))
22-
.map(x => (x(0), x(1)))
23-
.map(x => x._2.split(" ").map(y => (y, x._1)))
24-
.flatMap(x => x)
21+
.flatMap(x => {
22+
val arr = x.split("\t")
23+
val arr_words = arr(1).split(" ")
24+
arr_words.map(y => (y, arr(0)))
25+
})
2526
.reduceByKey( (x, y) => x + "|" + y)
2627
result.collect.foreach(println)
2728
sc.stop()

0 commit comments

Comments
 (0)