interpolate
This commit is contained in:
parent
7fd6b19be4
commit
db6c660600
44
main.ipynb
44
main.ipynb
@ -24,17 +24,14 @@
|
|||||||
"cells": [
|
"cells": [
|
||||||
{
|
{
|
||||||
"cell_type": "code",
|
"cell_type": "code",
|
||||||
"execution_count": 1,
|
"execution_count": 26,
|
||||||
"metadata": {
|
"metadata": {
|
||||||
"tags": []
|
"tags": []
|
||||||
},
|
},
|
||||||
"outputs": [],
|
"outputs": [],
|
||||||
"source": [
|
"source": [
|
||||||
"import pandas as pd\n",
|
"import pandas as pd\n",
|
||||||
"columns = ['FileId','Paper', 'Idk1', 'Year','Idk2','Idk3', 'LeftContext', 'RightContext']\n",
|
|
||||||
"\n",
|
"\n",
|
||||||
"# dev_data = pd.read_csv('dev-0/in.tsv', sep='\\t', names=columns, engine='python', quotechar='\"', error_bad_lines=False)\n",
|
|
||||||
"# dev_expected = pd.read_csv('dev-0/expected.tsv', sep='\\t', engine='python', quotechar='\"', error_bad_lines=False)\n",
|
|
||||||
"dev_data = list()\n",
|
"dev_data = list()\n",
|
||||||
"directory = 'dev-0'\n",
|
"directory = 'dev-0'\n",
|
||||||
"data_path = directory+'/in.tsv'\n",
|
"data_path = directory+'/in.tsv'\n",
|
||||||
@ -53,7 +50,32 @@
|
|||||||
},
|
},
|
||||||
{
|
{
|
||||||
"cell_type": "code",
|
"cell_type": "code",
|
||||||
"execution_count": 2,
|
"execution_count": 27,
|
||||||
|
"metadata": {},
|
||||||
|
"outputs": [],
|
||||||
|
"source": [
|
||||||
|
"def interpolate(bigram, unigramCounts, bigramCounts, listOfProb):\n",
|
||||||
|
" lambdaValue = 0.4\n",
|
||||||
|
" word1 = bigram[0]\n",
|
||||||
|
" word2 = bigram[1]\n",
|
||||||
|
" listOfProb[bigram] = (bigramCounts.get(bigram, 0))/(unigramCounts.get(word1, 0)) + (1-lambdaValue)*(unigramCounts.get(word2, 0))/(unigramCounts.get(word1, 0))\n"
|
||||||
|
]
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"cell_type": "code",
|
||||||
|
"execution_count": 28,
|
||||||
|
"metadata": {},
|
||||||
|
"outputs": [],
|
||||||
|
"source": [
|
||||||
|
"def calcProbability(bigram, unigramCounts, bigramCounts, listOfProb):\n",
|
||||||
|
" word1 = bigram[0]\n",
|
||||||
|
" word2 = bigram[1]\n",
|
||||||
|
" listOfProb[bigram] = (bigramCounts.get(bigram, 0))/(unigramCounts.get(word1, 0))\n"
|
||||||
|
]
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"cell_type": "code",
|
||||||
|
"execution_count": 29,
|
||||||
"metadata": {},
|
"metadata": {},
|
||||||
"outputs": [],
|
"outputs": [],
|
||||||
"source": [
|
"source": [
|
||||||
@ -86,15 +108,13 @@
|
|||||||
"def calcBigramProb(listOfBigrams, unigramCounts, bigramCounts):\n",
|
"def calcBigramProb(listOfBigrams, unigramCounts, bigramCounts):\n",
|
||||||
" listOfProb = {}\n",
|
" listOfProb = {}\n",
|
||||||
" for bigram in listOfBigrams:\n",
|
" for bigram in listOfBigrams:\n",
|
||||||
" word1 = bigram[0]\n",
|
" calcProbability(bigram, unigramCounts, bigramCounts, listOfProb)\n",
|
||||||
" word2 = bigram[1]\n",
|
|
||||||
" listOfProb[bigram] = (bigramCounts.get(bigram))/(sum(unigramCounts.values()))\n",
|
|
||||||
" return listOfProb"
|
" return listOfProb"
|
||||||
]
|
]
|
||||||
},
|
},
|
||||||
{
|
{
|
||||||
"cell_type": "code",
|
"cell_type": "code",
|
||||||
"execution_count": 3,
|
"execution_count": 30,
|
||||||
"metadata": {},
|
"metadata": {},
|
||||||
"outputs": [],
|
"outputs": [],
|
||||||
"source": [
|
"source": [
|
||||||
@ -103,7 +123,7 @@
|
|||||||
},
|
},
|
||||||
{
|
{
|
||||||
"cell_type": "code",
|
"cell_type": "code",
|
||||||
"execution_count": 4,
|
"execution_count": 31,
|
||||||
"metadata": {},
|
"metadata": {},
|
||||||
"outputs": [],
|
"outputs": [],
|
||||||
"source": [
|
"source": [
|
||||||
@ -112,7 +132,7 @@
|
|||||||
},
|
},
|
||||||
{
|
{
|
||||||
"cell_type": "code",
|
"cell_type": "code",
|
||||||
"execution_count": 51,
|
"execution_count": 32,
|
||||||
"metadata": {},
|
"metadata": {},
|
||||||
"outputs": [],
|
"outputs": [],
|
||||||
"source": [
|
"source": [
|
||||||
@ -135,7 +155,7 @@
|
|||||||
},
|
},
|
||||||
{
|
{
|
||||||
"cell_type": "code",
|
"cell_type": "code",
|
||||||
"execution_count": 52,
|
"execution_count": 33,
|
||||||
"metadata": {
|
"metadata": {
|
||||||
"tags": []
|
"tags": []
|
||||||
},
|
},
|
||||||
|
Loading…
Reference in New Issue
Block a user