interpolate
This commit is contained in:
parent
7fd6b19be4
commit
db6c660600
44
main.ipynb
44
main.ipynb
@ -24,17 +24,14 @@
|
||||
"cells": [
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 1,
|
||||
"execution_count": 26,
|
||||
"metadata": {
|
||||
"tags": []
|
||||
},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"import pandas as pd\n",
|
||||
"columns = ['FileId','Paper', 'Idk1', 'Year','Idk2','Idk3', 'LeftContext', 'RightContext']\n",
|
||||
"\n",
|
||||
"# dev_data = pd.read_csv('dev-0/in.tsv', sep='\\t', names=columns, engine='python', quotechar='\"', error_bad_lines=False)\n",
|
||||
"# dev_expected = pd.read_csv('dev-0/expected.tsv', sep='\\t', engine='python', quotechar='\"', error_bad_lines=False)\n",
|
||||
"dev_data = list()\n",
|
||||
"directory = 'dev-0'\n",
|
||||
"data_path = directory+'/in.tsv'\n",
|
||||
@ -53,7 +50,32 @@
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 2,
|
||||
"execution_count": 27,
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"def interpolate(bigram, unigramCounts, bigramCounts, listOfProb):\n",
|
||||
" lambdaValue = 0.4\n",
|
||||
" word1 = bigram[0]\n",
|
||||
" word2 = bigram[1]\n",
|
||||
" listOfProb[bigram] = (bigramCounts.get(bigram, 0))/(unigramCounts.get(word1, 0)) + (1-lambdaValue)*(unigramCounts.get(word2, 0))/(unigramCounts.get(word1, 0))\n"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 28,
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"def calcProbability(bigram, unigramCounts, bigramCounts, listOfProb):\n",
|
||||
" word1 = bigram[0]\n",
|
||||
" word2 = bigram[1]\n",
|
||||
" listOfProb[bigram] = (bigramCounts.get(bigram, 0))/(unigramCounts.get(word1, 0))\n"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 29,
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
@ -86,15 +108,13 @@
|
||||
"def calcBigramProb(listOfBigrams, unigramCounts, bigramCounts):\n",
|
||||
" listOfProb = {}\n",
|
||||
" for bigram in listOfBigrams:\n",
|
||||
" word1 = bigram[0]\n",
|
||||
" word2 = bigram[1]\n",
|
||||
" listOfProb[bigram] = (bigramCounts.get(bigram))/(sum(unigramCounts.values()))\n",
|
||||
" calcProbability(bigram, unigramCounts, bigramCounts, listOfProb)\n",
|
||||
" return listOfProb"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 3,
|
||||
"execution_count": 30,
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
@ -103,7 +123,7 @@
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 4,
|
||||
"execution_count": 31,
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
@ -112,7 +132,7 @@
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 51,
|
||||
"execution_count": 32,
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
@ -135,7 +155,7 @@
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 52,
|
||||
"execution_count": 33,
|
||||
"metadata": {
|
||||
"tags": []
|
||||
},
|
||||
|
Loading…
Reference in New Issue
Block a user