hadoop_sorted/mr/python/wordcount.sh
Paweł Skurzyński 75238b5071 first
2024-04-05 17:14:07 +02:00

13 lines
437 B
Bash

hdfs dfs -mkdir -p tmp/python
hdfs dfs -rm -r tmp/python/output
yarn jar /usr/lib/hadoop/hadoop-streaming.jar \
-D mapred.reduce.tasks=2 \
-input tmp/books \
-output tmp/python/output \
-mapper ~/hadoop_zaliczenie/mr/python/mapper.py \
-reducer ~/hadoop_zaliczenie/mr/python/reducer.py \
-file ~/hadoop_zaliczenie/mr/python/mapper.py \
-file ~/hadoop_zaliczenie/mr/python/reducer.py
hdfs dfs -cat tmp/python/output/part-*