diff --git a/main.ipynb b/main.ipynb index 69945b3..b07eef2 100644 --- a/main.ipynb +++ b/main.ipynb @@ -24,7 +24,7 @@ "cells": [ { "cell_type": "code", - "execution_count": 88, + "execution_count": 1, "metadata": { "tags": [] }, @@ -36,9 +36,10 @@ "# dev_data = pd.read_csv('dev-0/in.tsv', sep='\\t', names=columns, engine='python', quotechar='\"', error_bad_lines=False)\n", "# dev_expected = pd.read_csv('dev-0/expected.tsv', sep='\\t', engine='python', quotechar='\"', error_bad_lines=False)\n", "dev_data = list()\n", - "data_path = 'dev-0/in.tsv'\n", - "expected_path = 'dev-0/expected.tsv'\n", - "out_path = 'dev-0/out.tsv'\n", + "directory = 'dev-0'\n", + "data_path = directory+'/in.tsv'\n", + "expected_path = directory+'/expected.tsv'\n", + "out_path = directory+'/out.tsv'\n", "\n", "with open(data_path, \"r\") as f:\n", " for line in f.readlines():\n", @@ -52,7 +53,7 @@ }, { "cell_type": "code", - "execution_count": 89, + "execution_count": 2, "metadata": {}, "outputs": [], "source": [ @@ -93,7 +94,7 @@ }, { "cell_type": "code", - "execution_count": 90, + "execution_count": 3, "metadata": {}, "outputs": [], "source": [ @@ -102,7 +103,7 @@ }, { "cell_type": "code", - "execution_count": 91, + "execution_count": 4, "metadata": {}, "outputs": [], "source": [ @@ -111,7 +112,7 @@ }, { "cell_type": "code", - "execution_count": 112, + "execution_count": 51, "metadata": {}, "outputs": [], "source": [ @@ -123,16 +124,18 @@ " word = word.lower()\n", " word = re.sub('\\W+','', word)\n", " word_probs = dict(filter(lambda elem: elem[0][0] == word, probs.items()))\n", + " word_probs = dict(sorted(word_probs.items(), key=lambda item: item[1], reverse=True))\n", " rest = 1.0 - sum(word_probs.values())\n", - " word_probs = list(map(lambda elem: elem[0][0] + \":\" + str(elem[1]), list(word_probs.items())))\n", - " word_probs.append(':'+str(rest))\n", + " word_probs = list(map(lambda elem: elem[0][1] + \":\" + '{:.7f}'.format(elem[1]), list(word_probs.items())))\n", + " word_probs.append(':'+ '{:.7f}'.format(rest))\n", + " word_probs.append('\\n')\n", " word_probs = ' '.join(word_probs)\n", " f.write(word_probs)" ] }, { "cell_type": "code", - "execution_count": 113, + "execution_count": 52, "metadata": { "tags": [] }, @@ -140,6 +143,13 @@ "source": [ "save_results(probs, dev_data)" ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [] } ] } \ No newline at end of file