File tree Expand file tree Collapse file tree 1 file changed +39
-0
lines changed Expand file tree Collapse file tree 1 file changed +39
-0
lines changed Original file line number Diff line number Diff line change
1
+ #!/usr/bin/env python
2
+
3
+ from operator import itemgetter
4
+ import sys
5
+
6
+ current_word = None
7
+ current_count = 0
8
+ word = None
9
+
10
+ # input comes from STDIN
11
+ for line in sys .stdin :
12
+ # remove leading and trailing whitespace
13
+ line = line .strip ()
14
+
15
+ # parse the input we got from mapper.py
16
+ word , count = line .split ('\t ' , 1 )
17
+
18
+ # convert count (currently a string) to int
19
+ try :
20
+ count = int (count )
21
+ except ValueError :
22
+ # count was not a number, so silently
23
+ # ignore/discard this line
24
+ continue
25
+
26
+ # this IF-switch only works because Hadoop sorts map output
27
+ # by key (here: word) before it is passed to the reducer
28
+ if current_word == word :
29
+ current_count += count
30
+ else :
31
+ if current_word :
32
+ # write result to STDOUT
33
+ print '%s\t %s' % (current_word , current_count )
34
+ current_count = count
35
+ current_word = word
36
+
37
+ # do not forget to output the last word if needed!
38
+ if current_word == word :
39
+ print '%s\t %s' % (current_word , current_count )
You can’t perform that action at this time.
0 commit comments