From 9e58d37a8cb2aadca5b8cf41c34aa657e2c58521 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Pawe=C5=82=20Skurzy=C5=84ski?= <pawelskurzynski@192.168.1.3>
Date: Fri, 5 Apr 2024 15:34:31 +0200
Subject: [PATCH] update

---
 mr/python/reducer.py | 26 +++++++++++++++++---------
 1 file changed, 17 insertions(+), 9 deletions(-)

diff --git a/mr/python/reducer.py b/mr/python/reducer.py
index 1fe2f28..44c36fd 100644
--- a/mr/python/reducer.py
+++ b/mr/python/reducer.py
@@ -1,8 +1,10 @@
+#!/usr/bin/env python
 from operator import itemgetter
 import sys
 
-# Lista do przechowywania wyników
-results = []
+current_word = None
+current_count = 0
+word = None
 
 # input comes from STDIN
 for line in sys.stdin:
@@ -20,12 +22,18 @@ for line in sys.stdin:
         # ignore/discard this line
         continue
 
-    # Dodaj słowo i jego długość do listy wyników
-    results.append((word, len(word), count))
+    # this IF-switch only works because Hadoop sorts map output
+    # by key (here: word) before it is passed to the reducer
+    if current_word == word:
+        current_count += count
+    else:
+        if current_word:
+            # write result to STDOUT
+            print('%s\t%s' % (current_word, current_count))
+        current_count = count
+        current_word = word
 
-# Posortuj wyniki po długości słowa
-results.sort(key=lambda x: x[1])
+# do not forget to output the last word if needed!
+if current_word == word:
+    print('%s\t%s' % (current_word, current_count))
 
-# Wypisz posortowane wyniki
-for result in results:
-    print('%s\t%s' % (result[0], result[2]))