4.5 Klasyfikacja

2022-04-20 21:00:41 +02:00 · 2022-04-20 21:00:41 +02:00 · 31c2a80b77
commit 31c2a80b77
parent ad03ee53c1
7 changed files with 428754 additions and 1 deletions
--- a/.gitignore
+++ b/.gitignore
@ -1 +1,3 @@
 *~
+
+train/in.tsv
--- a/dev-0/out.tsv
+++ b/dev-0/out.tsv
--- a/dev-1/out.tsv
+++ b/dev-1/out.tsv
--- a/1
+++ b/1
@ -1 +0,0 @@
-Subproject commit b775a221e6107d8a0f9638d36f3561d7c7d7c18b
--- a/run.ipynb
+++ b/run.ipynb
@ -0,0 +1,149 @@
+{
+ "cells": [
+  {
+   "cell_type": "code",
+   "execution_count": 535,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "import random"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 536,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "with open('dev-0/in.tsv', 'r') as f:\n",
+    "    dev_x = f.readlines()"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 538,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "m_vocabulary = ['komputer', 'komputerze', 'aucie', 'auto', 'samochód', 'samochodzie', 'piwie', 'piwo', 'alkoholu', 'alkohol', 'żonie', 'żona', 'xboxie', 'xbox', 'co', 'e', 'XD', 'stary', 'staremu']"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 539,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "f_vocabulary = ['zakupy', 'zakupach', 'mężem', 'mąż', 'nasze', 'my', 'dzieckiem', 'dziecko', 'domu', 'dom', 'mieszkaniu', 'mieszkanie', 'kocham', 'kocha', 'chłopakowai', 'chłopak', 'haha', 'boże', 'uh', 'uhh', \":)\", 'mama', 'mamie', 'włosy']"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 540,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "def predict(text):\n",
+    "    score = 0\n",
+    "\n",
+    "    for word in m_vocabulary:\n",
+    "        if word in text:\n",
+    "            score += 1\n",
+    "\n",
+    "    for word in f_vocabulary:\n",
+    "        if word in text:\n",
+    "            score -= 1\n",
+    "\n",
+    "    if score == 0:\n",
+    "        return random.randint(0, 1)\n",
+    "    if score >0:\n",
+    "        return 1\n",
+    "    if score <0:\n",
+    "        return 0"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 541,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "with open('dev-0/expected.tsv', 'r') as f:\n",
+    "    dev_y = f.readlines()"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 542,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "dev = zip(dev_x, dev_y)"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 543,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "correct = 0\n",
+    "incorrect = 0\n",
+    "\n",
+    "with open('dev-0/out.tsv', 'wt') as f:\n",
+    "    for x, y in list(dev):\n",
+    "        f.write(str(predict(x))+'\\n')\n",
+    "        if predict(x) == int(y):\n",
+    "            correct += 1\n",
+    "        else:\n",
+    "            incorrect += 1\n",
+    "    "
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 544,
+   "metadata": {},
+   "outputs": [
+    {
+     "data": {
+      "text/plain": [
+       "0.5122792230182751"
+      ]
+     },
+     "execution_count": 544,
+     "metadata": {},
+     "output_type": "execute_result"
+    }
+   ],
+   "source": [
+    "correct/(correct+incorrect)"
+   ]
+  }
+ ],
+ "metadata": {
+  "interpreter": {
+   "hash": "8a24ca87d97ac268fc796e79e77f73ca37fd3e060a17758a6f2d8f8d4f13ae6a"
+  },
+  "kernelspec": {
+   "display_name": "Python 3.9.7 ('base')",
+   "language": "python",
+   "name": "python3"
+  },
+  "language_info": {
+   "codemirror_mode": {
+    "name": "ipython",
+    "version": 3
+   },
+   "file_extension": ".py",
+   "mimetype": "text/x-python",
+   "name": "python",
+   "nbconvert_exporter": "python",
+   "pygments_lexer": "ipython3",
+   "version": "3.9.7"
+  },
+  "orig_nbformat": 4
+ },
+ "nbformat": 4,
+ "nbformat_minor": 2
+}
--- a/run.py
+++ b/run.py
@ -0,0 +1,65 @@
+import random
+
+
+def predict(text):
+    score = 0
+
+    for word in m_vocabulary:
+        if word in text:
+            score += 1
+
+    for word in f_vocabulary:
+        if word in text:
+            score -= 1
+
+    if score == 0:
+        return random.randint(0, 1)
+    if score >0:
+        return 1
+    if score <0:
+        return 0
+
+
+
+m_vocabulary = ['komputer', 'komputerze', 'aucie', 'auto', 'samochód', 'samochodzie', 'piwie', 'piwo', 'alkoholu', 'alkohol', 'żonie', 'żona', 'xboxie', 'xbox', 'co', 'e', 'XD', 'stary', 'staremu']
+
+f_vocabulary = ['zakupy', 'zakupach', 'mężem', 'mąż', 'nasze', 'my', 'dzieckiem', 'dziecko', 'domu', 'dom', 'mieszkaniu', 'mieszkanie', 'kocham', 'kocha', 'chłopakowai', 'chłopak', 'haha', 'boże', 'uh', 'uhh', ":)", 'mama', 'mamie', 'włosy']
+
+
+
+with open('dev-0/in.tsv', 'r', encoding='utf8') as f:
+    dev0_x = f.readlines()
+
+with open('dev-0/expected.tsv', 'r', encoding='utf8') as f:
+    dev0_y = f.readlines()
+
+dev0 = zip(dev0_x, dev0_y)
+
+with open('dev-0/out.tsv', 'wt') as f:
+    for x, y in list(dev0):
+        f.write(str(predict(x))+'\n')
+
+
+with open('dev-1/in.tsv', 'r', encoding='utf8') as f:
+    dev1_x = f.readlines()
+
+with open('dev-1/expected.tsv', 'r', encoding='utf8') as f:
+    dev1_y = f.readlines()
+
+dev1 = zip(dev1_x, dev1_y)
+
+with open('dev-1/out.tsv', 'wt') as f:
+    for x, y in list(dev1):
+        f.write(str(predict(x))+'\n')
+
+
+with open('test-A/in.tsv', 'r', encoding='utf8') as f:
+    testA_x = f.readlines()
+
+with open('test-A/out.tsv', 'wt') as f:
+    for x in list(testA_x):
+        f.write(str(predict(x))+'\n')
+
+
+
+
--- a/test-A/out.tsv
+++ b/test-A/out.tsv
				`@ -1 +0,0 @@`
				`Subproject commit b775a221e6107d8a0f9638d36f3561d7c7d7c18b`