count number of occurances of each word in text

This commit is contained in:
Kuba 2023-03-21 22:32:40 +01:00
parent 6dd47bcdb5
commit b2b6e0ad30
1 changed files with 3 additions and 0 deletions

3
analysis/word_freq.sh Executable file
View File

@ -0,0 +1,3 @@
#!/bin/bash
#calculate no occurances of each word
bzcat $1 | tr -s '[:punct:][:space:]' '\n' | grep -E "^[^\x00-\x7F]*[[:alpha:]][^\x00-\x7F]*$" | sort | uniq -c | sort -nr > word_freq.txt