{ "cells": [ { "cell_type": "code", "execution_count": 23, "id": "ce420679-f5aa-4c83-a912-3c4afa982d7e", "metadata": {}, "outputs": [ { "name": "stderr", "output_type": "stream", "text": [ "D:\\Users\\Adrian\\anaconda3\\lib\\site-packages\\IPython\\core\\interactiveshell.py:3444: FutureWarning: The error_bad_lines argument has been deprecated and will be removed in a future version.\n", "\n", "\n", " exec(code_obj, self.user_global_ns, self.user_ns)\n", "b'Skipping line 25706: expected 2 fields, saw 3\\nSkipping line 58881: expected 2 fields, saw 3\\nSkipping line 73761: expected 2 fields, saw 3\\n'\n" ] } ], "source": [ "import pandas as pd\n", "from sklearn.feature_extraction.text import TfidfVectorizer\n", "from sklearn.naive_bayes import MultinomialNB\n", "from sklearn.pipeline import make_pipeline\n", "from sklearn.metrics import accuracy_score\n", "\n", "\n", "\n", "df = pd.read_csv(\"train/train.tsv\", sep=\"\\t\", header=None, error_bad_lines=False)\n", "\n", "\n", "\n", "dev_x = pd.read_csv(\"dev-0/in.tsv\", sep=\"\\t\", header=None, error_bad_lines=False)\n", "\n", "\n", "with open('test-A/in.tsv', 'r', encoding='utf8') as file:\n", " test = file.readlines()\n", "test = pd.Series(test)\n", "\n", "\n", "x = df[1]\n", "y = df[0]\n", "\n", "model = make_pipeline(TfidfVectorizer(), MultinomialNB())\n", "model.fit(x,y)\n", "\n", "pred_dev = model.predict(dev_x[0])\n", "pred_dev = pd.Series(pred_dev)\n", "\n", "with open('dev-0/out.tsv', 'wt') as file:\n", " for pred in pred_dev:\n", " file.write(str(pred)+'\\n')\n", "\n", "\n", "pred_test = model.predict(test)\n", "pred_test = pd.Series(pred_test)\n", "pred_test = pred_test.astype('int')\n", "\n", "\n", " \n", "with open('test-A/out.tsv', 'wt') as file:\n", " for pred in pred_test:\n", " file.write(str(pred)+'\\n')\n", "\n", "\n", "\n", "\n", "\n", " \n" ] } ], "metadata": { "kernelspec": { "display_name": "Python 3 (ipykernel)", "language": "python", "name": "python3" }, "language_info": { "codemirror_mode": { "name": "ipython", "version": 3 }, "file_extension": ".py", "mimetype": "text/x-python", "name": "python", "nbconvert_exporter": "python", "pygments_lexer": "ipython3", "version": "3.9.7" } }, "nbformat": 4, "nbformat_minor": 5 }