diff --git a/main.ipynb b/main.ipynb index dfa1a6f..ec281ab 100644 --- a/main.ipynb +++ b/main.ipynb @@ -24,7 +24,7 @@ "cells": [ { "cell_type": "code", - "execution_count": 26, + "execution_count": 17, "metadata": { "tags": [] }, @@ -33,7 +33,7 @@ "import pandas as pd\n", "\n", "dev_data = list()\n", - "directory = 'dev-0'\n", + "directory = 'test-A'\n", "data_path = directory+'/in.tsv'\n", "expected_path = directory+'/expected.tsv'\n", "out_path = directory+'/out.tsv'\n", @@ -42,15 +42,15 @@ " for line in f.readlines():\n", " dev_data.append(line.split('\\t')[-2])\n", "\n", - "dev_expected = list()\n", - "with open(expected_path, \"r\") as f:\n", - " for line in f.readlines():\n", - " dev_expected.append(line.replace('\\n',''))" + "# dev_expected = list()\n", + "# with open(expected_path, \"r\") as f:\n", + "# for line in f.readlines():\n", + "# dev_expected.append(line.replace('\\n',''))" ] }, { "cell_type": "code", - "execution_count": 27, + "execution_count": 18, "metadata": {}, "outputs": [], "source": [ @@ -63,19 +63,19 @@ }, { "cell_type": "code", - "execution_count": 28, + "execution_count": 19, "metadata": {}, "outputs": [], "source": [ "def calcProbability(bigram, unigramCounts, bigramCounts, listOfProb):\n", " word1 = bigram[0]\n", " word2 = bigram[1]\n", - " listOfProb[bigram] = (bigramCounts.get(bigram, 0))/(unigramCounts.get(word1, 0))\n" + " listOfProb[bigram] = ((bigramCounts.get(bigram, 0))/len(bigramCounts.items()))/((unigramCounts.get(word1, 0))/len(unigramCounts.items()))\n" ] }, { "cell_type": "code", - "execution_count": 29, + "execution_count": 20, "metadata": {}, "outputs": [], "source": [ @@ -114,7 +114,7 @@ }, { "cell_type": "code", - "execution_count": 30, + "execution_count": 13, "metadata": {}, "outputs": [], "source": [ @@ -123,7 +123,7 @@ }, { "cell_type": "code", - "execution_count": 31, + "execution_count": 14, "metadata": {}, "outputs": [], "source": [ @@ -132,7 +132,7 @@ }, { "cell_type": "code", - "execution_count": 32, + "execution_count": 15, "metadata": {}, "outputs": [], "source": [ @@ -147,15 +147,15 @@ " word_probs = dict(sorted(word_probs.items(), key=lambda item: item[1], reverse=True))\n", " rest = 1.0 - sum(word_probs.values())\n", " word_probs = list(map(lambda elem: elem[0][1] + \":\" + '{:.7f}'.format(elem[1]), list(word_probs.items())))\n", - " word_probs.append(':'+ '{:.7f}'.format(rest))\n", - " word_probs.append('\\n')\n", + " word_probs.append(':'+'{:.7f}'.format(rest))\n", " word_probs = ' '.join(word_probs)\n", + " word_probs += '\\n'\n", " f.write(word_probs)" ] }, { "cell_type": "code", - "execution_count": 33, + "execution_count": 16, "metadata": { "tags": [] },