4.5 Klasyfikacja
This commit is contained in:
parent
ad03ee53c1
commit
31c2a80b77
2
.gitignore
vendored
2
.gitignore
vendored
@ -1 +1,3 @@
|
|||||||
*~
|
*~
|
||||||
|
|
||||||
|
train/in.tsv
|
||||||
|
137314
dev-0/out.tsv
Normal file
137314
dev-0/out.tsv
Normal file
File diff suppressed because it is too large
Load Diff
156606
dev-1/out.tsv
Normal file
156606
dev-1/out.tsv
Normal file
File diff suppressed because it is too large
Load Diff
@ -1 +0,0 @@
|
|||||||
Subproject commit b775a221e6107d8a0f9638d36f3561d7c7d7c18b
|
|
149
run.ipynb
Normal file
149
run.ipynb
Normal file
@ -0,0 +1,149 @@
|
|||||||
|
{
|
||||||
|
"cells": [
|
||||||
|
{
|
||||||
|
"cell_type": "code",
|
||||||
|
"execution_count": 535,
|
||||||
|
"metadata": {},
|
||||||
|
"outputs": [],
|
||||||
|
"source": [
|
||||||
|
"import random"
|
||||||
|
]
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"cell_type": "code",
|
||||||
|
"execution_count": 536,
|
||||||
|
"metadata": {},
|
||||||
|
"outputs": [],
|
||||||
|
"source": [
|
||||||
|
"with open('dev-0/in.tsv', 'r') as f:\n",
|
||||||
|
" dev_x = f.readlines()"
|
||||||
|
]
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"cell_type": "code",
|
||||||
|
"execution_count": 538,
|
||||||
|
"metadata": {},
|
||||||
|
"outputs": [],
|
||||||
|
"source": [
|
||||||
|
"m_vocabulary = ['komputer', 'komputerze', 'aucie', 'auto', 'samochód', 'samochodzie', 'piwie', 'piwo', 'alkoholu', 'alkohol', 'żonie', 'żona', 'xboxie', 'xbox', 'co', 'e', 'XD', 'stary', 'staremu']"
|
||||||
|
]
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"cell_type": "code",
|
||||||
|
"execution_count": 539,
|
||||||
|
"metadata": {},
|
||||||
|
"outputs": [],
|
||||||
|
"source": [
|
||||||
|
"f_vocabulary = ['zakupy', 'zakupach', 'mężem', 'mąż', 'nasze', 'my', 'dzieckiem', 'dziecko', 'domu', 'dom', 'mieszkaniu', 'mieszkanie', 'kocham', 'kocha', 'chłopakowai', 'chłopak', 'haha', 'boże', 'uh', 'uhh', \":)\", 'mama', 'mamie', 'włosy']"
|
||||||
|
]
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"cell_type": "code",
|
||||||
|
"execution_count": 540,
|
||||||
|
"metadata": {},
|
||||||
|
"outputs": [],
|
||||||
|
"source": [
|
||||||
|
"def predict(text):\n",
|
||||||
|
" score = 0\n",
|
||||||
|
"\n",
|
||||||
|
" for word in m_vocabulary:\n",
|
||||||
|
" if word in text:\n",
|
||||||
|
" score += 1\n",
|
||||||
|
"\n",
|
||||||
|
" for word in f_vocabulary:\n",
|
||||||
|
" if word in text:\n",
|
||||||
|
" score -= 1\n",
|
||||||
|
"\n",
|
||||||
|
" if score == 0:\n",
|
||||||
|
" return random.randint(0, 1)\n",
|
||||||
|
" if score >0:\n",
|
||||||
|
" return 1\n",
|
||||||
|
" if score <0:\n",
|
||||||
|
" return 0"
|
||||||
|
]
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"cell_type": "code",
|
||||||
|
"execution_count": 541,
|
||||||
|
"metadata": {},
|
||||||
|
"outputs": [],
|
||||||
|
"source": [
|
||||||
|
"with open('dev-0/expected.tsv', 'r') as f:\n",
|
||||||
|
" dev_y = f.readlines()"
|
||||||
|
]
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"cell_type": "code",
|
||||||
|
"execution_count": 542,
|
||||||
|
"metadata": {},
|
||||||
|
"outputs": [],
|
||||||
|
"source": [
|
||||||
|
"dev = zip(dev_x, dev_y)"
|
||||||
|
]
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"cell_type": "code",
|
||||||
|
"execution_count": 543,
|
||||||
|
"metadata": {},
|
||||||
|
"outputs": [],
|
||||||
|
"source": [
|
||||||
|
"correct = 0\n",
|
||||||
|
"incorrect = 0\n",
|
||||||
|
"\n",
|
||||||
|
"with open('dev-0/out.tsv', 'wt') as f:\n",
|
||||||
|
" for x, y in list(dev):\n",
|
||||||
|
" f.write(str(predict(x))+'\\n')\n",
|
||||||
|
" if predict(x) == int(y):\n",
|
||||||
|
" correct += 1\n",
|
||||||
|
" else:\n",
|
||||||
|
" incorrect += 1\n",
|
||||||
|
" "
|
||||||
|
]
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"cell_type": "code",
|
||||||
|
"execution_count": 544,
|
||||||
|
"metadata": {},
|
||||||
|
"outputs": [
|
||||||
|
{
|
||||||
|
"data": {
|
||||||
|
"text/plain": [
|
||||||
|
"0.5122792230182751"
|
||||||
|
]
|
||||||
|
},
|
||||||
|
"execution_count": 544,
|
||||||
|
"metadata": {},
|
||||||
|
"output_type": "execute_result"
|
||||||
|
}
|
||||||
|
],
|
||||||
|
"source": [
|
||||||
|
"correct/(correct+incorrect)"
|
||||||
|
]
|
||||||
|
}
|
||||||
|
],
|
||||||
|
"metadata": {
|
||||||
|
"interpreter": {
|
||||||
|
"hash": "8a24ca87d97ac268fc796e79e77f73ca37fd3e060a17758a6f2d8f8d4f13ae6a"
|
||||||
|
},
|
||||||
|
"kernelspec": {
|
||||||
|
"display_name": "Python 3.9.7 ('base')",
|
||||||
|
"language": "python",
|
||||||
|
"name": "python3"
|
||||||
|
},
|
||||||
|
"language_info": {
|
||||||
|
"codemirror_mode": {
|
||||||
|
"name": "ipython",
|
||||||
|
"version": 3
|
||||||
|
},
|
||||||
|
"file_extension": ".py",
|
||||||
|
"mimetype": "text/x-python",
|
||||||
|
"name": "python",
|
||||||
|
"nbconvert_exporter": "python",
|
||||||
|
"pygments_lexer": "ipython3",
|
||||||
|
"version": "3.9.7"
|
||||||
|
},
|
||||||
|
"orig_nbformat": 4
|
||||||
|
},
|
||||||
|
"nbformat": 4,
|
||||||
|
"nbformat_minor": 2
|
||||||
|
}
|
65
run.py
Normal file
65
run.py
Normal file
@ -0,0 +1,65 @@
|
|||||||
|
import random
|
||||||
|
|
||||||
|
|
||||||
|
def predict(text):
|
||||||
|
score = 0
|
||||||
|
|
||||||
|
for word in m_vocabulary:
|
||||||
|
if word in text:
|
||||||
|
score += 1
|
||||||
|
|
||||||
|
for word in f_vocabulary:
|
||||||
|
if word in text:
|
||||||
|
score -= 1
|
||||||
|
|
||||||
|
if score == 0:
|
||||||
|
return random.randint(0, 1)
|
||||||
|
if score >0:
|
||||||
|
return 1
|
||||||
|
if score <0:
|
||||||
|
return 0
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
m_vocabulary = ['komputer', 'komputerze', 'aucie', 'auto', 'samochód', 'samochodzie', 'piwie', 'piwo', 'alkoholu', 'alkohol', 'żonie', 'żona', 'xboxie', 'xbox', 'co', 'e', 'XD', 'stary', 'staremu']
|
||||||
|
|
||||||
|
f_vocabulary = ['zakupy', 'zakupach', 'mężem', 'mąż', 'nasze', 'my', 'dzieckiem', 'dziecko', 'domu', 'dom', 'mieszkaniu', 'mieszkanie', 'kocham', 'kocha', 'chłopakowai', 'chłopak', 'haha', 'boże', 'uh', 'uhh', ":)", 'mama', 'mamie', 'włosy']
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
with open('dev-0/in.tsv', 'r', encoding='utf8') as f:
|
||||||
|
dev0_x = f.readlines()
|
||||||
|
|
||||||
|
with open('dev-0/expected.tsv', 'r', encoding='utf8') as f:
|
||||||
|
dev0_y = f.readlines()
|
||||||
|
|
||||||
|
dev0 = zip(dev0_x, dev0_y)
|
||||||
|
|
||||||
|
with open('dev-0/out.tsv', 'wt') as f:
|
||||||
|
for x, y in list(dev0):
|
||||||
|
f.write(str(predict(x))+'\n')
|
||||||
|
|
||||||
|
|
||||||
|
with open('dev-1/in.tsv', 'r', encoding='utf8') as f:
|
||||||
|
dev1_x = f.readlines()
|
||||||
|
|
||||||
|
with open('dev-1/expected.tsv', 'r', encoding='utf8') as f:
|
||||||
|
dev1_y = f.readlines()
|
||||||
|
|
||||||
|
dev1 = zip(dev1_x, dev1_y)
|
||||||
|
|
||||||
|
with open('dev-1/out.tsv', 'wt') as f:
|
||||||
|
for x, y in list(dev1):
|
||||||
|
f.write(str(predict(x))+'\n')
|
||||||
|
|
||||||
|
|
||||||
|
with open('test-A/in.tsv', 'r', encoding='utf8') as f:
|
||||||
|
testA_x = f.readlines()
|
||||||
|
|
||||||
|
with open('test-A/out.tsv', 'wt') as f:
|
||||||
|
for x in list(testA_x):
|
||||||
|
f.write(str(predict(x))+'\n')
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
|
134618
test-A/out.tsv
Normal file
134618
test-A/out.tsv
Normal file
File diff suppressed because it is too large
Load Diff
Loading…
Reference in New Issue
Block a user