fixed stuff

This commit is contained in:
Ramon Dyzman 2022-04-11 17:07:47 +02:00
parent db6c660600
commit 45f267d3c1

View File

@ -24,7 +24,7 @@
"cells": [ "cells": [
{ {
"cell_type": "code", "cell_type": "code",
"execution_count": 26, "execution_count": 17,
"metadata": { "metadata": {
"tags": [] "tags": []
}, },
@ -33,7 +33,7 @@
"import pandas as pd\n", "import pandas as pd\n",
"\n", "\n",
"dev_data = list()\n", "dev_data = list()\n",
"directory = 'dev-0'\n", "directory = 'test-A'\n",
"data_path = directory+'/in.tsv'\n", "data_path = directory+'/in.tsv'\n",
"expected_path = directory+'/expected.tsv'\n", "expected_path = directory+'/expected.tsv'\n",
"out_path = directory+'/out.tsv'\n", "out_path = directory+'/out.tsv'\n",
@ -42,15 +42,15 @@
" for line in f.readlines():\n", " for line in f.readlines():\n",
" dev_data.append(line.split('\\t')[-2])\n", " dev_data.append(line.split('\\t')[-2])\n",
"\n", "\n",
"dev_expected = list()\n", "# dev_expected = list()\n",
"with open(expected_path, \"r\") as f:\n", "# with open(expected_path, \"r\") as f:\n",
" for line in f.readlines():\n", "# for line in f.readlines():\n",
" dev_expected.append(line.replace('\\n',''))" "# dev_expected.append(line.replace('\\n',''))"
] ]
}, },
{ {
"cell_type": "code", "cell_type": "code",
"execution_count": 27, "execution_count": 18,
"metadata": {}, "metadata": {},
"outputs": [], "outputs": [],
"source": [ "source": [
@ -63,19 +63,19 @@
}, },
{ {
"cell_type": "code", "cell_type": "code",
"execution_count": 28, "execution_count": 19,
"metadata": {}, "metadata": {},
"outputs": [], "outputs": [],
"source": [ "source": [
"def calcProbability(bigram, unigramCounts, bigramCounts, listOfProb):\n", "def calcProbability(bigram, unigramCounts, bigramCounts, listOfProb):\n",
" word1 = bigram[0]\n", " word1 = bigram[0]\n",
" word2 = bigram[1]\n", " word2 = bigram[1]\n",
" listOfProb[bigram] = (bigramCounts.get(bigram, 0))/(unigramCounts.get(word1, 0))\n" " listOfProb[bigram] = ((bigramCounts.get(bigram, 0))/len(bigramCounts.items()))/((unigramCounts.get(word1, 0))/len(unigramCounts.items()))\n"
] ]
}, },
{ {
"cell_type": "code", "cell_type": "code",
"execution_count": 29, "execution_count": 20,
"metadata": {}, "metadata": {},
"outputs": [], "outputs": [],
"source": [ "source": [
@ -114,7 +114,7 @@
}, },
{ {
"cell_type": "code", "cell_type": "code",
"execution_count": 30, "execution_count": 13,
"metadata": {}, "metadata": {},
"outputs": [], "outputs": [],
"source": [ "source": [
@ -123,7 +123,7 @@
}, },
{ {
"cell_type": "code", "cell_type": "code",
"execution_count": 31, "execution_count": 14,
"metadata": {}, "metadata": {},
"outputs": [], "outputs": [],
"source": [ "source": [
@ -132,7 +132,7 @@
}, },
{ {
"cell_type": "code", "cell_type": "code",
"execution_count": 32, "execution_count": 15,
"metadata": {}, "metadata": {},
"outputs": [], "outputs": [],
"source": [ "source": [
@ -148,14 +148,14 @@
" rest = 1.0 - sum(word_probs.values())\n", " rest = 1.0 - sum(word_probs.values())\n",
" word_probs = list(map(lambda elem: elem[0][1] + \":\" + '{:.7f}'.format(elem[1]), list(word_probs.items())))\n", " word_probs = list(map(lambda elem: elem[0][1] + \":\" + '{:.7f}'.format(elem[1]), list(word_probs.items())))\n",
" word_probs.append(':'+'{:.7f}'.format(rest))\n", " word_probs.append(':'+'{:.7f}'.format(rest))\n",
" word_probs.append('\\n')\n",
" word_probs = ' '.join(word_probs)\n", " word_probs = ' '.join(word_probs)\n",
" word_probs += '\\n'\n",
" f.write(word_probs)" " f.write(word_probs)"
] ]
}, },
{ {
"cell_type": "code", "cell_type": "code",
"execution_count": 33, "execution_count": 16,
"metadata": { "metadata": {
"tags": [] "tags": []
}, },