From b2b6e0ad3078a4f11c7a17ad659fd2233d74ff32 Mon Sep 17 00:00:00 2001 From: Kuba Date: Tue, 21 Mar 2023 22:32:40 +0100 Subject: [PATCH] count number of occurances of each word in text --- analysis/word_freq.sh | 3 +++ 1 file changed, 3 insertions(+) create mode 100755 analysis/word_freq.sh diff --git a/analysis/word_freq.sh b/analysis/word_freq.sh new file mode 100755 index 0000000..0abbf44 --- /dev/null +++ b/analysis/word_freq.sh @@ -0,0 +1,3 @@ +#!/bin/bash +#calculate no occurances of each word +bzcat $1 | tr -s '[:punct:][:space:]' '\n' | grep -E "^[^\x00-\x7F]*[[:alpha:]][^\x00-\x7F]*$" | sort | uniq -c | sort -nr > word_freq.txt \ No newline at end of file