update
This commit is contained in:
parent
d881302e4e
commit
9e58d37a8c
@ -1,8 +1,10 @@
|
|||||||
|
#!/usr/bin/env python
|
||||||
from operator import itemgetter
|
from operator import itemgetter
|
||||||
import sys
|
import sys
|
||||||
|
|
||||||
# Lista do przechowywania wyników
|
current_word = None
|
||||||
results = []
|
current_count = 0
|
||||||
|
word = None
|
||||||
|
|
||||||
# input comes from STDIN
|
# input comes from STDIN
|
||||||
for line in sys.stdin:
|
for line in sys.stdin:
|
||||||
@ -20,12 +22,18 @@ for line in sys.stdin:
|
|||||||
# ignore/discard this line
|
# ignore/discard this line
|
||||||
continue
|
continue
|
||||||
|
|
||||||
# Dodaj słowo i jego długość do listy wyników
|
# this IF-switch only works because Hadoop sorts map output
|
||||||
results.append((word, len(word), count))
|
# by key (here: word) before it is passed to the reducer
|
||||||
|
if current_word == word:
|
||||||
|
current_count += count
|
||||||
|
else:
|
||||||
|
if current_word:
|
||||||
|
# write result to STDOUT
|
||||||
|
print('%s\t%s' % (current_word, current_count))
|
||||||
|
current_count = count
|
||||||
|
current_word = word
|
||||||
|
|
||||||
# Posortuj wyniki po długości słowa
|
# do not forget to output the last word if needed!
|
||||||
results.sort(key=lambda x: x[1])
|
if current_word == word:
|
||||||
|
print('%s\t%s' % (current_word, current_count))
|
||||||
|
|
||||||
# Wypisz posortowane wyniki
|
|
||||||
for result in results:
|
|
||||||
print('%s\t%s' % (result[0], result[2]))
|
|
||||||
|
Loading…
Reference in New Issue
Block a user