fixed stuff

2022-04-11 17:07:47 +02:00 · 2022-04-11 17:07:47 +02:00 · 45f267d3c1
commit 45f267d3c1
parent db6c660600
1 changed files with 16 additions and 16 deletions
--- a/main.ipynb
+++ b/main.ipynb
@ -24,7 +24,7 @@
 "cells": [
  {
   "cell_type": "code",
-   "execution_count": 26,
+   "execution_count": 17,
   "metadata": {
    "tags": []
   },
@ -33,7 +33,7 @@
    "import pandas as pd\n",
    "\n",
    "dev_data = list()\n",
-    "directory = 'dev-0'\n",
+    "directory = 'test-A'\n",
    "data_path = directory+'/in.tsv'\n",
    "expected_path = directory+'/expected.tsv'\n",
    "out_path = directory+'/out.tsv'\n",
@ -42,15 +42,15 @@
    "    for line in f.readlines():\n",
    "        dev_data.append(line.split('\\t')[-2])\n",
    "\n",
-    "dev_expected = list()\n",
+    "# dev_expected = list()\n",
-    "with open(expected_path, \"r\") as f:\n",
+    "# with open(expected_path, \"r\") as f:\n",
-    "    for line in f.readlines():\n",
+    "#     for line in f.readlines():\n",
-    "        dev_expected.append(line.replace('\\n',''))"
+    "#         dev_expected.append(line.replace('\\n',''))"
   ]
  },
  {
   "cell_type": "code",
-   "execution_count": 27,
+   "execution_count": 18,
   "metadata": {},
   "outputs": [],
   "source": [
@ -63,19 +63,19 @@
  },
  {
   "cell_type": "code",
-   "execution_count": 28,
+   "execution_count": 19,
   "metadata": {},
   "outputs": [],
   "source": [
    "def calcProbability(bigram, unigramCounts, bigramCounts, listOfProb):\n",
    "    word1 = bigram[0]\n",
    "    word2 = bigram[1]\n",
-    "    listOfProb[bigram] = (bigramCounts.get(bigram, 0))/(unigramCounts.get(word1, 0))\n"
+    "    listOfProb[bigram] = ((bigramCounts.get(bigram, 0))/len(bigramCounts.items()))/((unigramCounts.get(word1, 0))/len(unigramCounts.items()))\n"
   ]
  },
  {
   "cell_type": "code",
-   "execution_count": 29,
+   "execution_count": 20,
   "metadata": {},
   "outputs": [],
   "source": [
@ -114,7 +114,7 @@
  },
  {
   "cell_type": "code",
-   "execution_count": 30,
+   "execution_count": 13,
   "metadata": {},
   "outputs": [],
   "source": [
@ -123,7 +123,7 @@
  },
  {
   "cell_type": "code",
-   "execution_count": 31,
+   "execution_count": 14,
   "metadata": {},
   "outputs": [],
   "source": [
@ -132,7 +132,7 @@
  },
  {
   "cell_type": "code",
-   "execution_count": 32,
+   "execution_count": 15,
   "metadata": {},
   "outputs": [],
   "source": [
@ -147,15 +147,15 @@
    "            word_probs = dict(sorted(word_probs.items(), key=lambda item: item[1], reverse=True))\n",
    "            rest = 1.0 - sum(word_probs.values())\n",
    "            word_probs = list(map(lambda elem: elem[0][1] + \":\"  + '{:.7f}'.format(elem[1]), list(word_probs.items())))\n",
-    "            word_probs.append(':'+ '{:.7f}'.format(rest))\n",
+    "            word_probs.append(':'+'{:.7f}'.format(rest))\n",
    "            word_probs.append('\\n')\n",
    "            word_probs = ' '.join(word_probs)\n",
    "            word_probs += '\\n'\n",
    "            f.write(word_probs)"
   ]
  },
  {
   "cell_type": "code",
-   "execution_count": 33,
+   "execution_count": 16,
   "metadata": {
    "tags": []
   },