fixed stuff
This commit is contained in:
parent
db6c660600
commit
45f267d3c1
32
main.ipynb
32
main.ipynb
@ -24,7 +24,7 @@
|
|||||||
"cells": [
|
"cells": [
|
||||||
{
|
{
|
||||||
"cell_type": "code",
|
"cell_type": "code",
|
||||||
"execution_count": 26,
|
"execution_count": 17,
|
||||||
"metadata": {
|
"metadata": {
|
||||||
"tags": []
|
"tags": []
|
||||||
},
|
},
|
||||||
@ -33,7 +33,7 @@
|
|||||||
"import pandas as pd\n",
|
"import pandas as pd\n",
|
||||||
"\n",
|
"\n",
|
||||||
"dev_data = list()\n",
|
"dev_data = list()\n",
|
||||||
"directory = 'dev-0'\n",
|
"directory = 'test-A'\n",
|
||||||
"data_path = directory+'/in.tsv'\n",
|
"data_path = directory+'/in.tsv'\n",
|
||||||
"expected_path = directory+'/expected.tsv'\n",
|
"expected_path = directory+'/expected.tsv'\n",
|
||||||
"out_path = directory+'/out.tsv'\n",
|
"out_path = directory+'/out.tsv'\n",
|
||||||
@ -42,15 +42,15 @@
|
|||||||
" for line in f.readlines():\n",
|
" for line in f.readlines():\n",
|
||||||
" dev_data.append(line.split('\\t')[-2])\n",
|
" dev_data.append(line.split('\\t')[-2])\n",
|
||||||
"\n",
|
"\n",
|
||||||
"dev_expected = list()\n",
|
"# dev_expected = list()\n",
|
||||||
"with open(expected_path, \"r\") as f:\n",
|
"# with open(expected_path, \"r\") as f:\n",
|
||||||
" for line in f.readlines():\n",
|
"# for line in f.readlines():\n",
|
||||||
" dev_expected.append(line.replace('\\n',''))"
|
"# dev_expected.append(line.replace('\\n',''))"
|
||||||
]
|
]
|
||||||
},
|
},
|
||||||
{
|
{
|
||||||
"cell_type": "code",
|
"cell_type": "code",
|
||||||
"execution_count": 27,
|
"execution_count": 18,
|
||||||
"metadata": {},
|
"metadata": {},
|
||||||
"outputs": [],
|
"outputs": [],
|
||||||
"source": [
|
"source": [
|
||||||
@ -63,19 +63,19 @@
|
|||||||
},
|
},
|
||||||
{
|
{
|
||||||
"cell_type": "code",
|
"cell_type": "code",
|
||||||
"execution_count": 28,
|
"execution_count": 19,
|
||||||
"metadata": {},
|
"metadata": {},
|
||||||
"outputs": [],
|
"outputs": [],
|
||||||
"source": [
|
"source": [
|
||||||
"def calcProbability(bigram, unigramCounts, bigramCounts, listOfProb):\n",
|
"def calcProbability(bigram, unigramCounts, bigramCounts, listOfProb):\n",
|
||||||
" word1 = bigram[0]\n",
|
" word1 = bigram[0]\n",
|
||||||
" word2 = bigram[1]\n",
|
" word2 = bigram[1]\n",
|
||||||
" listOfProb[bigram] = (bigramCounts.get(bigram, 0))/(unigramCounts.get(word1, 0))\n"
|
" listOfProb[bigram] = ((bigramCounts.get(bigram, 0))/len(bigramCounts.items()))/((unigramCounts.get(word1, 0))/len(unigramCounts.items()))\n"
|
||||||
]
|
]
|
||||||
},
|
},
|
||||||
{
|
{
|
||||||
"cell_type": "code",
|
"cell_type": "code",
|
||||||
"execution_count": 29,
|
"execution_count": 20,
|
||||||
"metadata": {},
|
"metadata": {},
|
||||||
"outputs": [],
|
"outputs": [],
|
||||||
"source": [
|
"source": [
|
||||||
@ -114,7 +114,7 @@
|
|||||||
},
|
},
|
||||||
{
|
{
|
||||||
"cell_type": "code",
|
"cell_type": "code",
|
||||||
"execution_count": 30,
|
"execution_count": 13,
|
||||||
"metadata": {},
|
"metadata": {},
|
||||||
"outputs": [],
|
"outputs": [],
|
||||||
"source": [
|
"source": [
|
||||||
@ -123,7 +123,7 @@
|
|||||||
},
|
},
|
||||||
{
|
{
|
||||||
"cell_type": "code",
|
"cell_type": "code",
|
||||||
"execution_count": 31,
|
"execution_count": 14,
|
||||||
"metadata": {},
|
"metadata": {},
|
||||||
"outputs": [],
|
"outputs": [],
|
||||||
"source": [
|
"source": [
|
||||||
@ -132,7 +132,7 @@
|
|||||||
},
|
},
|
||||||
{
|
{
|
||||||
"cell_type": "code",
|
"cell_type": "code",
|
||||||
"execution_count": 32,
|
"execution_count": 15,
|
||||||
"metadata": {},
|
"metadata": {},
|
||||||
"outputs": [],
|
"outputs": [],
|
||||||
"source": [
|
"source": [
|
||||||
@ -147,15 +147,15 @@
|
|||||||
" word_probs = dict(sorted(word_probs.items(), key=lambda item: item[1], reverse=True))\n",
|
" word_probs = dict(sorted(word_probs.items(), key=lambda item: item[1], reverse=True))\n",
|
||||||
" rest = 1.0 - sum(word_probs.values())\n",
|
" rest = 1.0 - sum(word_probs.values())\n",
|
||||||
" word_probs = list(map(lambda elem: elem[0][1] + \":\" + '{:.7f}'.format(elem[1]), list(word_probs.items())))\n",
|
" word_probs = list(map(lambda elem: elem[0][1] + \":\" + '{:.7f}'.format(elem[1]), list(word_probs.items())))\n",
|
||||||
" word_probs.append(':'+ '{:.7f}'.format(rest))\n",
|
" word_probs.append(':'+'{:.7f}'.format(rest))\n",
|
||||||
" word_probs.append('\\n')\n",
|
|
||||||
" word_probs = ' '.join(word_probs)\n",
|
" word_probs = ' '.join(word_probs)\n",
|
||||||
|
" word_probs += '\\n'\n",
|
||||||
" f.write(word_probs)"
|
" f.write(word_probs)"
|
||||||
]
|
]
|
||||||
},
|
},
|
||||||
{
|
{
|
||||||
"cell_type": "code",
|
"cell_type": "code",
|
||||||
"execution_count": 33,
|
"execution_count": 16,
|
||||||
"metadata": {
|
"metadata": {
|
||||||
"tags": []
|
"tags": []
|
||||||
},
|
},
|
||||||
|
Loading…
Reference in New Issue
Block a user