petite-difference-challenge2/run.ipynb
2022-04-20 21:00:41 +02:00

150 lines
3.2 KiB
Plaintext

{
"cells": [
{
"cell_type": "code",
"execution_count": 535,
"metadata": {},
"outputs": [],
"source": [
"import random"
]
},
{
"cell_type": "code",
"execution_count": 536,
"metadata": {},
"outputs": [],
"source": [
"with open('dev-0/in.tsv', 'r') as f:\n",
" dev_x = f.readlines()"
]
},
{
"cell_type": "code",
"execution_count": 538,
"metadata": {},
"outputs": [],
"source": [
"m_vocabulary = ['komputer', 'komputerze', 'aucie', 'auto', 'samochód', 'samochodzie', 'piwie', 'piwo', 'alkoholu', 'alkohol', 'żonie', 'żona', 'xboxie', 'xbox', 'co', 'e', 'XD', 'stary', 'staremu']"
]
},
{
"cell_type": "code",
"execution_count": 539,
"metadata": {},
"outputs": [],
"source": [
"f_vocabulary = ['zakupy', 'zakupach', 'mężem', 'mąż', 'nasze', 'my', 'dzieckiem', 'dziecko', 'domu', 'dom', 'mieszkaniu', 'mieszkanie', 'kocham', 'kocha', 'chłopakowai', 'chłopak', 'haha', 'boże', 'uh', 'uhh', \":)\", 'mama', 'mamie', 'włosy']"
]
},
{
"cell_type": "code",
"execution_count": 540,
"metadata": {},
"outputs": [],
"source": [
"def predict(text):\n",
" score = 0\n",
"\n",
" for word in m_vocabulary:\n",
" if word in text:\n",
" score += 1\n",
"\n",
" for word in f_vocabulary:\n",
" if word in text:\n",
" score -= 1\n",
"\n",
" if score == 0:\n",
" return random.randint(0, 1)\n",
" if score >0:\n",
" return 1\n",
" if score <0:\n",
" return 0"
]
},
{
"cell_type": "code",
"execution_count": 541,
"metadata": {},
"outputs": [],
"source": [
"with open('dev-0/expected.tsv', 'r') as f:\n",
" dev_y = f.readlines()"
]
},
{
"cell_type": "code",
"execution_count": 542,
"metadata": {},
"outputs": [],
"source": [
"dev = zip(dev_x, dev_y)"
]
},
{
"cell_type": "code",
"execution_count": 543,
"metadata": {},
"outputs": [],
"source": [
"correct = 0\n",
"incorrect = 0\n",
"\n",
"with open('dev-0/out.tsv', 'wt') as f:\n",
" for x, y in list(dev):\n",
" f.write(str(predict(x))+'\\n')\n",
" if predict(x) == int(y):\n",
" correct += 1\n",
" else:\n",
" incorrect += 1\n",
" "
]
},
{
"cell_type": "code",
"execution_count": 544,
"metadata": {},
"outputs": [
{
"data": {
"text/plain": [
"0.5122792230182751"
]
},
"execution_count": 544,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"correct/(correct+incorrect)"
]
}
],
"metadata": {
"interpreter": {
"hash": "8a24ca87d97ac268fc796e79e77f73ca37fd3e060a17758a6f2d8f8d4f13ae6a"
},
"kernelspec": {
"display_name": "Python 3.9.7 ('base')",
"language": "python",
"name": "python3"
},
"language_info": {
"codemirror_mode": {
"name": "ipython",
"version": 3
},
"file_extension": ".py",
"mimetype": "text/x-python",
"name": "python",
"nbconvert_exporter": "python",
"pygments_lexer": "ipython3",
"version": "3.9.7"
},
"orig_nbformat": 4
},
"nbformat": 4,
"nbformat_minor": 2
}