update
This commit is contained in:
parent
2a702c14d3
commit
d881302e4e
@ -1,38 +1,31 @@
|
||||
#!/usr/bin/env python
|
||||
from operator import itemgetter
|
||||
import sys
|
||||
|
||||
current_word = None
|
||||
current_count = 0
|
||||
word = None
|
||||
|
||||
# input comes from STDIN
|
||||
for line in sys.stdin:
|
||||
# remove leading and trailing whitespace
|
||||
line = line.strip()
|
||||
|
||||
# parse the input we got from mapper.py
|
||||
word, count = line.split('\t', 1)
|
||||
|
||||
# convert count (currently a string) to int
|
||||
try:
|
||||
count = int(count)
|
||||
except ValueError:
|
||||
# count was not a number, so silently
|
||||
# ignore/discard this line
|
||||
continue
|
||||
|
||||
# this IF-switch only works because Hadoop sorts map output
|
||||
# by key (here: word) before it is passed to the reducer
|
||||
if current_word == word:
|
||||
current_count += count
|
||||
else:
|
||||
if current_word:
|
||||
# write result to STDOUT
|
||||
print('%s\t%s' % (current_word, current_count))
|
||||
current_count = count
|
||||
current_word = word
|
||||
|
||||
# do not forget to output the last word if needed!
|
||||
if current_word == word:
|
||||
print('%s\t%s' % (current_word, current_count))
|
||||
from operator import itemgetter
|
||||
import sys
|
||||
|
||||
# Lista do przechowywania wyników
|
||||
results = []
|
||||
|
||||
# input comes from STDIN
|
||||
for line in sys.stdin:
|
||||
# remove leading and trailing whitespace
|
||||
line = line.strip()
|
||||
|
||||
# parse the input we got from mapper.py
|
||||
word, count = line.split('\t', 1)
|
||||
|
||||
# convert count (currently a string) to int
|
||||
try:
|
||||
count = int(count)
|
||||
except ValueError:
|
||||
# count was not a number, so silently
|
||||
# ignore/discard this line
|
||||
continue
|
||||
|
||||
# Dodaj słowo i jego długość do listy wyników
|
||||
results.append((word, len(word), count))
|
||||
|
||||
# Posortuj wyniki po długości słowa
|
||||
results.sort(key=lambda x: x[1])
|
||||
|
||||
# Wypisz posortowane wyniki
|
||||
for result in results:
|
||||
print('%s\t%s' % (result[0], result[2]))
|
||||
|
Loading…
Reference in New Issue
Block a user