From d6be2a2ce998f492ddf347a3f7e5fec18627815b Mon Sep 17 00:00:00 2001 From: Kuba Date: Tue, 21 Mar 2023 22:31:17 +0100 Subject: [PATCH] calculate frequency of bigrams in text --- analysis/bigrams_freq.sh | 4 ++++ 1 file changed, 4 insertions(+) create mode 100755 analysis/bigrams_freq.sh diff --git a/analysis/bigrams_freq.sh b/analysis/bigrams_freq.sh new file mode 100755 index 0000000..510e1ab --- /dev/null +++ b/analysis/bigrams_freq.sh @@ -0,0 +1,4 @@ +#!/bin/bash + +# Convert text to lowercase and replace non-alphabetic characters with spaces, then use awk to print bigrams +bzcat $1 | tr '[:upper:]' '[:lower:]' | tr -cs '[:alpha:]' ' ' | fold -w 1000 | sed -e 's/\b\s\+\b/ /g' | awk '{for(i=1;i bigram_freq.txt \ No newline at end of file