160 lines
5.6 KiB
Plaintext
160 lines
5.6 KiB
Plaintext
|
{
|
||
|
"cells": [
|
||
|
{
|
||
|
"cell_type": "code",
|
||
|
"execution_count": 1,
|
||
|
"id": "516ddab1-dae1-4fb8-9aa0-1b3a953a3058",
|
||
|
"metadata": {},
|
||
|
"outputs": [],
|
||
|
"source": [
|
||
|
"# potrzebne importy\n",
|
||
|
"import random\n",
|
||
|
"from sklearn.metrics import accuracy_score"
|
||
|
]
|
||
|
},
|
||
|
{
|
||
|
"cell_type": "code",
|
||
|
"execution_count": 2,
|
||
|
"id": "b05f02a4-3730-4501-b0fc-9534445bb54c",
|
||
|
"metadata": {},
|
||
|
"outputs": [],
|
||
|
"source": [
|
||
|
"# funkcja wyszukiwarki\n",
|
||
|
"def search_engine(data_in, data_out):\n",
|
||
|
" result = []\n",
|
||
|
" with open (data_in, \"r\", encoding=\"UTF-8\") as f:\n",
|
||
|
" lines = f.readlines()\n",
|
||
|
" \n",
|
||
|
" for line in lines:\n",
|
||
|
" man = 0\n",
|
||
|
" woman = 0\n",
|
||
|
" \n",
|
||
|
" for word in man_dictionary:\n",
|
||
|
" if word in line:\n",
|
||
|
" man += 1\n",
|
||
|
"\n",
|
||
|
" for word in woman_dictionary:\n",
|
||
|
" if word in line:\n",
|
||
|
" woman += 1\n",
|
||
|
"\n",
|
||
|
" maximum = max(man, woman)\n",
|
||
|
"\n",
|
||
|
" if maximum == 0 or man == woman:\n",
|
||
|
" rand = random.randint(0, 1)\n",
|
||
|
" result.append(f\"{rand}\\n\")\n",
|
||
|
" elif maximum == man:\n",
|
||
|
" result.append(\"1\\n\")\n",
|
||
|
" elif maximum == woman:\n",
|
||
|
" result.append(\"0\\n\")\n",
|
||
|
"\n",
|
||
|
" with open (data_out, \"w\", encoding=\"UTF-8\") as output:\n",
|
||
|
" output.writelines(result)\n",
|
||
|
"\n",
|
||
|
"#funkcja sprawdzająca rezultaty\n",
|
||
|
"def check(result_path, expected_path):\n",
|
||
|
" result = []\n",
|
||
|
" expected = []\n",
|
||
|
" with open(expected_path, \"r\", encoding='utf-8') as file:\n",
|
||
|
" for line in file.readlines():\n",
|
||
|
" expected.append(line.strip())\n",
|
||
|
" with open(result_path, \"r\", encoding='utf-8') as file:\n",
|
||
|
" for line in file.readlines():\n",
|
||
|
" result.append(line.strip())\n",
|
||
|
" print(accuracy_score(expected,result))"
|
||
|
]
|
||
|
},
|
||
|
{
|
||
|
"cell_type": "code",
|
||
|
"execution_count": 3,
|
||
|
"id": "96cdc260-f007-4793-9406-076f995fd3a7",
|
||
|
"metadata": {},
|
||
|
"outputs": [],
|
||
|
"source": [
|
||
|
"#słowniki\n",
|
||
|
"man_dictionary = ['samochód', 'auto', 'paliwo', 'paliwa', 'silnik', 'komputer', 'windows', 'pc', 'pad', 'gamepad', 'kierownica', 'opon', 'apple', 'samsung', 'system', 'sfd', 'kfd', 'elektroda', 'autocentrum',\n",
|
||
|
" 'dobreprogramy', 'youtube', 'audi', 'bmw', 'mercedes', 'porsche', 'lamborghini', 'poszedłem', 'zrobiłem', 'byłem', 'pojechałem', 'zjadłem', 'widziałem', 'grałem', 'zagrałem', 'mecz', 'piłka', 'nożna',\n",
|
||
|
" 'piłki', 'koszykówka', 'tenis', 'siłownia', 'biceps', 'triceps', 'hamulec', 'boisko', 'lech', 'legia', 'real', 'barcelona', 'borussia', 'bayern', 'stadion', 'piwo',\n",
|
||
|
" 'wódka', 'whisky', 'whiskey', 'kieliszek', 'wóda', 'kac', 'piwko']\n",
|
||
|
"\n",
|
||
|
"woman_dictionary = ['okres', 'miesiączka', 'miesiaczka', 'miesiączki', 'krwawienie', 'podpaska', 'podpaski', 'tampony', 'tampon', 'związek', 'związku', 'chłopak', 'mąż', 'miłość', 'ciąża', 'ciąży', 'ciążę', 'tabletki',\n",
|
||
|
" 'antykoncepcyjne', 'antykoncepcja', 'hormony', 'dziecko', 'poród', 'rodzić', 'kuchnia', 'kuchnii', 'ciasto', 'przepis', 'rodzic', 'rodzice', 'ubranka', 'poszłam', 'zrobiłam', 'byłam', 'pojechałam', 'ugotowałam', 'zjadłam', 'szafa'\n",
|
||
|
" 'fryzura', 'pomadka', 'błyszczyk', 'tusz', 'rzęsy', 'brwi', 'brązer', 'opalenizna', 'rozstępy', 'zerwanie', 'przeszłość', 'prostownica', 'suszarka', 'buty', 'sandałki', 'ginekolog'\n",
|
||
|
" 'biustonosz', 'depilacja', 'bikini', 'włosy', 'pielęgnacja', 'samotność', 'kolczyki', 'lakier', 'lokówka', 'peeling', 'outfit', 'świece', 'podkład', 'makijaż', 'perfumy', 'kompleks', 'odżywka',\n",
|
||
|
" 'sukienka', 'spódniczka', 'bielizna', 'rajstopy', 'wino']"
|
||
|
]
|
||
|
},
|
||
|
{
|
||
|
"cell_type": "code",
|
||
|
"execution_count": 4,
|
||
|
"id": "b00b5bec-5af4-4f4e-907d-0a62ab300e97",
|
||
|
"metadata": {},
|
||
|
"outputs": [],
|
||
|
"source": [
|
||
|
"search_engine(\"dev-0\\in.tsv\", \"dev-0\\out.tsv\")\n",
|
||
|
"search_engine(\"dev-1\\in.tsv\", \"dev-1\\out.tsv\")\n",
|
||
|
"search_engine(\"test-A\\in.tsv\", \"test-A\\out.tsv\")"
|
||
|
]
|
||
|
},
|
||
|
{
|
||
|
"cell_type": "code",
|
||
|
"execution_count": 5,
|
||
|
"id": "555418b4-1d38-4491-815d-be95b6eb5b35",
|
||
|
"metadata": {},
|
||
|
"outputs": [
|
||
|
{
|
||
|
"name": "stdout",
|
||
|
"output_type": "stream",
|
||
|
"text": [
|
||
|
"0.519051225657981\n",
|
||
|
"0.5157912212814324\n"
|
||
|
]
|
||
|
}
|
||
|
],
|
||
|
"source": [
|
||
|
"# check(\"dev-0\\out.tsv\", \"dev-0\\expected.tsv\")\n",
|
||
|
"# check(\"dev-1\\out.tsv\", \"dev-1\\expected.tsv\")"
|
||
|
]
|
||
|
},
|
||
|
{
|
||
|
"cell_type": "code",
|
||
|
"execution_count": 6,
|
||
|
"id": "3e9e2af3-e559-4e5e-a76b-f6520d00025b",
|
||
|
"metadata": {},
|
||
|
"outputs": [
|
||
|
{
|
||
|
"name": "stderr",
|
||
|
"output_type": "stream",
|
||
|
"text": [
|
||
|
"[NbConvertApp] Converting notebook run.ipynb to script\n",
|
||
|
"[NbConvertApp] Writing 3371 bytes to run.py\n"
|
||
|
]
|
||
|
}
|
||
|
],
|
||
|
"source": [
|
||
|
"!jupyter nbconvert --to script run.ipynb"
|
||
|
]
|
||
|
}
|
||
|
],
|
||
|
"metadata": {
|
||
|
"kernelspec": {
|
||
|
"display_name": "Python 3 (ipykernel)",
|
||
|
"language": "python",
|
||
|
"name": "python3"
|
||
|
},
|
||
|
"language_info": {
|
||
|
"codemirror_mode": {
|
||
|
"name": "ipython",
|
||
|
"version": 3
|
||
|
},
|
||
|
"file_extension": ".py",
|
||
|
"mimetype": "text/x-python",
|
||
|
"name": "python",
|
||
|
"nbconvert_exporter": "python",
|
||
|
"pygments_lexer": "ipython3",
|
||
|
"version": "3.9.7"
|
||
|
}
|
||
|
},
|
||
|
"nbformat": 4,
|
||
|
"nbformat_minor": 5
|
||
|
}
|