hadoop_zaliczenie/mr/python/mapper.py

15 lines
252 B
Python

#!/usr/bin/env python
import sys
import re
for line in sys.stdin:
line = line.strip()
words = re.findall(r'\b\w+\b', line)
for word in words:
word = re.sub(r'[^a-zA-Z0-9]', '', word).lower()
print('%s\t%s' % (word, 1))